summaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-01-17 20:45:01 +0000
commit706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch)
tree4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/CodeGen
parent7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff)
Notes
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp27
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp144
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp72
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp32
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIE.cpp10
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h38
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp52
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h5
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp359
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h14
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp32
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfFile.h13
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp28
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp61
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h9
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp3
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp174
-rw-r--r--llvm/lib/CodeGen/BranchFolding.h4
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp1
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp4
-rw-r--r--llvm/lib/CodeGen/CFGuardLongjmp.cpp120
-rw-r--r--llvm/lib/CodeGen/CFIInstrInserter.cpp4
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp246
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp16
-rw-r--r--llvm/lib/CodeGen/DFAPacketizer.cpp62
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp10
-rw-r--r--llvm/lib/CodeGen/DwarfEHPrepare.cpp3
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp11
-rw-r--r--llvm/lib/CodeGen/EdgeBundles.cpp1
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp44
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp1
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp46
-rw-r--r--llvm/lib/CodeGen/FEntryInserter.cpp1
-rw-r--r--llvm/lib/CodeGen/FaultMaps.cpp4
-rw-r--r--llvm/lib/CodeGen/FinalizeISel.cpp1
-rw-r--r--llvm/lib/CodeGen/FuncletLayout.cpp1
-rw-r--r--llvm/lib/CodeGen/GCMetadata.cpp3
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp7
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp172
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp74
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp70
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp131
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp248
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp16
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp19
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp15
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp22
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp17
-rw-r--r--llvm/lib/CodeGen/GlobalMerge.cpp1
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp115
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp11
-rw-r--r--llvm/lib/CodeGen/ImplicitNullChecks.cpp5
-rw-r--r--llvm/lib/CodeGen/IndirectBrExpandPass.cpp1
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp19
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp1
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp3
-rw-r--r--llvm/lib/CodeGen/IntrinsicLowering.cpp36
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp4
-rw-r--r--llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp1
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues.cpp659
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp226
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp19
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp23
-rw-r--r--llvm/lib/CodeGen/LivePhysRegs.cpp30
-rw-r--r--llvm/lib/CodeGen/LiveRangeShrink.cpp1
-rw-r--r--llvm/lib/CodeGen/LiveRegMatrix.cpp1
-rw-r--r--llvm/lib/CodeGen/LiveRegUnits.cpp45
-rw-r--r--llvm/lib/CodeGen/LocalStackSlotAllocation.cpp1
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp29
-rw-r--r--llvm/lib/CodeGen/LowerEmuTLS.cpp1
-rw-r--r--llvm/lib/CodeGen/MIRCanonicalizerPass.cpp63
-rw-r--r--llvm/lib/CodeGen/MIRNamerPass.cpp6
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp10
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h4
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp326
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp45
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp35
-rw-r--r--llvm/lib/CodeGen/MIRPrintingPass.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRVRegNamerUtils.cpp424
-rw-r--r--llvm/lib/CodeGen/MIRVRegNamerUtils.h98
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp11
-rw-r--r--llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp84
-rw-r--r--llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp24
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp236
-rw-r--r--llvm/lib/CodeGen/MachineDominanceFrontier.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineDominators.cpp7
-rw-r--r--llvm/lib/CodeGen/MachineFrameInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp51
-rw-r--r--llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp159
-rw-r--r--llvm/lib/CodeGen/MachineInstrBundle.cpp31
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp63
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineLoopUtils.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfo.cpp14
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp67
-rw-r--r--llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp148
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachinePostDominators.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineRegionInfo.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp68
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp208
-rw-r--r--llvm/lib/CodeGen/MachineSizeOpts.cpp122
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp93
-rw-r--r--llvm/lib/CodeGen/MacroFusion.cpp27
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp217
-rw-r--r--llvm/lib/CodeGen/NonRelocatableStringpool.cpp54
-rw-r--r--llvm/lib/CodeGen/OptimizePHIs.cpp1
-rw-r--r--llvm/lib/CodeGen/ParallelCG.cpp4
-rw-r--r--llvm/lib/CodeGen/PatchableFunction.cpp10
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp1
-rw-r--r--llvm/lib/CodeGen/PostRAHazardRecognizer.cpp1
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp3
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp29
-rw-r--r--llvm/lib/CodeGen/ProcessImplicitDefs.cpp1
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp1
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp141
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp141
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp11
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoCollector.cpp4
-rw-r--r--llvm/lib/CodeGen/RegisterClassInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp227
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp5
-rw-r--r--llvm/lib/CodeGen/RenameIndependentSubregs.cpp1
-rw-r--r--llvm/lib/CodeGen/ResetMachineFunctionPass.cpp3
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp7
-rw-r--r--llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp15
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp906
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp20
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp770
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp1388
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp458
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp65
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h35
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1020
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp488
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp438
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp698
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h27
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp20
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp209
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp587
-rw-r--r--llvm/lib/CodeGen/ShadowStackGCLowering.cpp1
-rw-r--r--llvm/lib/CodeGen/ShrinkWrap.cpp1
-rw-r--r--llvm/lib/CodeGen/SjLjEHPrepare.cpp9
-rw-r--r--llvm/lib/CodeGen/SlotIndexes.cpp11
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.cpp1
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp3
-rw-r--r--llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp1
-rw-r--r--llvm/lib/CodeGen/StackMaps.cpp24
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp5
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp1
-rw-r--r--llvm/lib/CodeGen/SwitchLoweringUtils.cpp8
-rw-r--r--llvm/lib/CodeGen/TailDuplication.cpp16
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp17
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp13
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp80
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp84
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp61
-rw-r--r--llvm/lib/CodeGen/TargetOptionsImpl.cpp14
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp11
-rw-r--r--llvm/lib/CodeGen/TargetSubtargetInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp2
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp1011
-rw-r--r--llvm/lib/CodeGen/UnreachableBlockElim.cpp1
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp136
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp2
-rw-r--r--llvm/lib/CodeGen/WinEHPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/XRayInstrumentation.cpp1
192 files changed, 10533 insertions, 5252 deletions
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 4f24f077d120..1632895fe5fa 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -262,7 +262,7 @@ static bool isNoopBitcast(Type *T1, Type *T2,
/// Look through operations that will be free to find the earliest source of
/// this value.
///
-/// @param ValLoc If V has aggegate type, we will be interested in a particular
+/// @param ValLoc If V has aggregate type, we will be interested in a particular
/// scalar component. This records its address; the reverse of this list gives a
/// sequence of indices appropriate for an extractvalue to locate the important
/// value. This value is updated during the function and on exit will indicate
@@ -567,12 +567,16 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
AttributeList::ReturnIndex);
- // NoAlias and NonNull are completely benign as far as calling convention
+ // Following attributes are completely benign as far as calling convention
// goes, they shouldn't affect whether the call is a tail call.
CallerAttrs.removeAttribute(Attribute::NoAlias);
CalleeAttrs.removeAttribute(Attribute::NoAlias);
CallerAttrs.removeAttribute(Attribute::NonNull);
CalleeAttrs.removeAttribute(Attribute::NonNull);
+ CallerAttrs.removeAttribute(Attribute::Dereferenceable);
+ CalleeAttrs.removeAttribute(Attribute::Dereferenceable);
+ CallerAttrs.removeAttribute(Attribute::DereferenceableOrNull);
+ CalleeAttrs.removeAttribute(Attribute::DereferenceableOrNull);
if (CallerAttrs.contains(Attribute::ZExt)) {
if (!CalleeAttrs.contains(Attribute::ZExt))
@@ -611,6 +615,22 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
return CallerAttrs == CalleeAttrs;
}
+/// Check whether B is a bitcast of a pointer type to another pointer type,
+/// which is equal to A.
+static bool isPointerBitcastEqualTo(const Value *A, const Value *B) {
+ assert(A && B && "Expected non-null inputs!");
+
+ auto *BitCastIn = dyn_cast<BitCastInst>(B);
+
+ if (!BitCastIn)
+ return false;
+
+ if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
+ return false;
+
+ return A == BitCastIn->getOperand(0);
+}
+
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
const Instruction *I,
const ReturnInst *Ret,
@@ -643,7 +663,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
(IID == Intrinsic::memset &&
TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
- RetVal == Call->getArgOperand(0))
+ (RetVal == Call->getArgOperand(0) ||
+ isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
return true;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 73c53d6c4af5..6f9aa4dd79fd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -31,13 +31,16 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -52,6 +55,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -81,7 +85,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodePadder.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCDwarf.h"
@@ -139,18 +142,13 @@ static const char *const DbgTimerDescription = "Debug Info Emission";
static const char *const EHTimerName = "write_exception";
static const char *const EHTimerDescription = "DWARF Exception Writer";
static const char *const CFGuardName = "Control Flow Guard";
-static const char *const CFGuardDescription = "Control Flow Guard Tables";
+static const char *const CFGuardDescription = "Control Flow Guard";
static const char *const CodeViewLineTablesGroupName = "linetables";
static const char *const CodeViewLineTablesGroupDescription =
"CodeView Line Tables";
STATISTIC(EmittedInsts, "Number of machine instrs printed");
-static cl::opt<bool> EnableRemarksSection(
- "remarks-section",
- cl::desc("Emit a section containing remark diagnostics metadata"),
- cl::init(false));
-
char AsmPrinter::ID = 0;
using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
@@ -253,6 +251,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -381,12 +381,12 @@ bool AsmPrinter::doInitialization(Module &M) {
EHTimerDescription, DWARFGroupName,
DWARFGroupDescription);
+ // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2).
if (mdconst::extract_or_null<ConstantInt>(
- MMI->getModule()->getModuleFlag("cfguardtable")))
+ MMI->getModule()->getModuleFlag("cfguard")))
Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
-
return false;
}
@@ -879,6 +879,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << MI->getOperand(0).getImm();
} else if (MI->getOperand(0).isCImm()) {
MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+ } else if (MI->getOperand(0).isTargetIndex()) {
+ auto Op = MI->getOperand(0);
+ OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
+ return true;
} else {
unsigned Reg;
if (MI->getOperand(0).isReg()) {
@@ -940,7 +944,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
MF->getFunction().needsUnwindTableEntry())
return CFI_M_EH;
- if (MMI->hasDebugInfo())
+ if (MMI->hasDebugInfo() || MF->getTarget().Options.ForceDwarfFrameSection)
return CFI_M_Debug;
return CFI_M_None;
@@ -1065,13 +1069,9 @@ void AsmPrinter::EmitFunctionBody() {
++NumInstsInFunction;
}
- // If there is a pre-instruction symbol, emit a label for it here. If the
- // instruction was duplicated and the label has already been emitted,
- // don't re-emit the same label.
- // FIXME: Consider strengthening that to an assertion.
+ // If there is a pre-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPreInstrSymbol())
- if (S->isUndefined())
- OutStreamer->EmitLabel(S);
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1124,13 +1124,9 @@ void AsmPrinter::EmitFunctionBody() {
break;
}
- // If there is a post-instruction symbol, emit a label for it here. If
- // the instruction was duplicated and the label has already been emitted,
- // don't re-emit the same label.
- // FIXME: Consider strengthening that to an assertion.
+ // If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol())
- if (S->isUndefined())
- OutStreamer->EmitLabel(S);
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1226,6 +1222,8 @@ void AsmPrinter::EmitFunctionBody() {
// Emit section containing stack size metadata.
emitStackSizeSection(*MF);
+ emitPatchableFunctionEntries();
+
if (isVerbose())
OutStreamer->GetCommentOS() << "-- End function\n";
@@ -1365,14 +1363,14 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
}
}
-void AsmPrinter::emitRemarksSection(Module &M) {
- RemarkStreamer *RS = M.getContext().getRemarkStreamer();
- if (!RS)
+void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) {
+ if (!RS.needsSection())
return;
- remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer();
+
+ remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer();
Optional<SmallString<128>> Filename;
- if (Optional<StringRef> FilenameRef = RS->getFilename()) {
+ if (Optional<StringRef> FilenameRef = RS.getFilename()) {
Filename = *FilenameRef;
sys::fs::make_absolute(*Filename);
assert(!Filename->empty() && "The filename can't be empty.");
@@ -1385,7 +1383,7 @@ void AsmPrinter::emitRemarksSection(Module &M) {
: RemarkSerializer.metaSerializer(OS);
MetaSerializer->emit();
- // Switch to the right section: .remarks/__remarks.
+ // Switch to the remarks section.
MCSection *RemarksSection =
OutContext.getObjectFileInfo()->getRemarksSection();
OutStreamer->SwitchSection(RemarksSection);
@@ -1427,8 +1425,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit the remarks section contents.
// FIXME: Figure out when is the safest time to emit this section. It should
// not come after debug info.
- if (EnableRemarksSection)
- emitRemarksSection(M);
+ if (RemarkStreamer *RS = M.getContext().getRemarkStreamer())
+ emitRemarksSection(*RS);
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
@@ -1503,8 +1501,6 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
- OutStreamer->AddBlankLine();
-
// Print aliases in topological order, that is, for each alias a = b,
// b must be printed before a.
// This is because on some targets (e.g. PowerPC) linker expects aliases in
@@ -1666,6 +1662,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
+ const Function &F = MF.getFunction();
// Get the function symbol.
if (MAI->needsFunctionDescriptors()) {
@@ -1678,7 +1675,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnSym =
OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName());
- const Function &F = MF.getFunction();
MCSectionXCOFF *FnEntryPointSec =
cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM));
// Set the containing csect.
@@ -1691,7 +1687,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
bool NeedsLocalForSize = MAI->needsLocalForSize();
- if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
+ if (F.hasFnAttribute("patchable-function-entry") ||
+ needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
MF.getTarget().Options.EmitStackSizeSection) {
CurrentFnBegin = createTempSymbol("func_begin");
if (NeedsLocalForSize)
@@ -1699,6 +1696,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MBFI = (PSI && PSI->hasProfileSummary()) ?
+ // ORE conditionally computes MBFI. If available, use it, otherwise
+ // request it.
+ (ORE->getBFI() ? ORE->getBFI() :
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
+ nullptr;
}
namespace {
@@ -1769,6 +1773,11 @@ void AsmPrinter::EmitConstantPool() {
if (!Sym->isUndefined())
continue;
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ cast<MCSymbolXCOFF>(Sym)->setContainingCsect(
+ cast<MCSectionXCOFF>(CPSections[i].S));
+ }
+
if (CurSection != CPSections[i].S) {
OutStreamer->SwitchSection(CPSections[i].S);
EmitAlignment(Align(CPSections[i].Alignment));
@@ -1858,10 +1867,16 @@ void AsmPrinter::EmitJumpTableInfo() {
// second label is actually referenced by the code.
if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
// FIXME: This doesn't have to have any specific name, just any randomly
- // named and numbered 'l' label would work. Simplify GetJTISymbol.
+ // named and numbered local label started with 'l' would work. Simplify
+ // GetJTISymbol.
OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
- OutStreamer->EmitLabel(GetJTISymbol(JTI));
+ MCSymbol* JTISymbol = GetJTISymbol(JTI);
+ if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+ cast<MCSymbolXCOFF>(JTISymbol)->setContainingCsect(
+ cast<MCSectionXCOFF>(TLOF.getSectionForJumpTable(F, TM)));
+ }
+ OutStreamer->EmitLabel(JTISymbol);
for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
@@ -2914,19 +2929,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
-void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
- MCCodePaddingContext &Context) const {
- assert(MF != nullptr && "Machine function must be valid");
- Context.IsPaddingActive = !MF->hasInlineAsm() &&
- !MF->getFunction().hasOptSize() &&
- TM.getOptLevel() != CodeGenOpt::None;
- Context.IsBasicBlockReachableViaFallthrough =
- std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
- MBB.pred_end();
- Context.IsBasicBlockReachableViaBranch =
- MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB);
-}
-
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
@@ -2943,9 +2945,6 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
const Align Alignment = MBB.getAlignment();
if (Alignment != Align::None())
EmitAlignment(Alignment);
- MCCodePaddingContext Context;
- setupCodePaddingContext(MBB, Context);
- OutStreamer->EmitCodePaddingBasicBlockStart(Context);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
@@ -2993,11 +2992,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
}
}
-void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
- MCCodePaddingContext Context;
- setupCodePaddingContext(MBB, Context);
- OutStreamer->EmitCodePaddingBasicBlockEnd(Context);
-}
+void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}
void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
bool IsDefinition) const {
@@ -3202,6 +3197,41 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
AlwaysInstrument, &F, Version});
}
+void AsmPrinter::emitPatchableFunctionEntries() {
+ const Function &F = MF->getFunction();
+ if (!F.hasFnAttribute("patchable-function-entry"))
+ return;
+ const unsigned PointerSize = getPointerSize();
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC;
+
+ // As of binutils 2.33, GNU as does not support section flag "o" or linkage
+ // field "unique". Use SHF_LINK_ORDER if we are using the integrated
+ // assembler.
+ if (MAI->useIntegratedAssembler()) {
+ Flags |= ELF::SHF_LINK_ORDER;
+ std::string GroupName;
+ if (F.hasComdat()) {
+ Flags |= ELF::SHF_GROUP;
+ GroupName = F.getComdat()->getName();
+ }
+ MCSection *Section = getObjFileLowering().SectionForGlobal(&F, TM);
+ unsigned UniqueID =
+ PatchableFunctionEntryID
+ .try_emplace(Section, PatchableFunctionEntryID.size())
+ .first->second;
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0,
+ GroupName, UniqueID, cast<MCSymbolELF>(CurrentFnSym)));
+ } else {
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ "__patchable_function_entries", ELF::SHT_PROGBITS, Flags));
+ }
+ EmitAlignment(Align(PointerSize));
+ OutStreamer->EmitSymbolValue(CurrentFnBegin, PointerSize);
+ }
+}
+
uint16_t AsmPrinter::getDwarfVersion() const {
return OutStreamer->getContext().getDwarfVersion();
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 420df26a2b8b..c631cc5360b8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -207,11 +207,17 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
if (Done) break;
+ bool HasCurlyBraces = false;
+ if (*LastEmitted == '{') { // ${variable}
+ ++LastEmitted; // Consume '{' character.
+ HasCurlyBraces = true;
+ }
+
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
- if (LastEmitted[0] == '{' && LastEmitted[1] == ':') {
- LastEmitted += 2;
+ if (HasCurlyBraces && LastEmitted[0] == ':') {
+ ++LastEmitted;
const char *StrStart = LastEmitted;
const char *StrEnd = strchr(StrStart, '}');
if (!StrEnd)
@@ -238,6 +244,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
+ char Modifier[2] = { 0, 0 };
+
+ if (HasCurlyBraces) {
+ // If we have curly braces, check for a modifier character. This
+ // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+ if (*LastEmitted == ':') {
+ ++LastEmitted; // Consume ':' character.
+ if (*LastEmitted == 0)
+ report_fatal_error("Bad ${:} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
+ Modifier[0] = *LastEmitted;
+ ++LastEmitted; // Consume modifier character.
+ }
+
+ if (*LastEmitted != '}')
+ report_fatal_error("Bad ${} expression in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ ++LastEmitted; // Consume '}' character.
+ }
+
// Okay, we finally have a value number. Ask the target to print this
// operand!
unsigned OpNo = InlineAsm::MIOp_FirstOperand;
@@ -262,9 +289,11 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++OpNo; // Skip over the ID number.
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
}
}
if (Error) {
@@ -427,26 +456,23 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// FIXME: Shouldn't arch-independent output template handling go into
// PrintAsmOperand?
- if (Modifier[0] == 'l') { // Labels are target independent.
- if (MI->getOperand(OpNo).isBlockAddress()) {
- const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
- MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
- Sym->print(OS, AP->MAI);
- MMI->getContext().registerInlineAsmLabel(Sym);
- } else if (MI->getOperand(OpNo).isMBB()) {
- const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
- Sym->print(OS, AP->MAI);
- } else {
- Error = true;
- }
+ // Labels are target independent.
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
+ } else if (MI->getOperand(OpNo).isMBB()) {
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else if (Modifier[0] == 'l') {
+ Error = true;
+ } else if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
- if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
- } else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
- }
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
}
}
if (Error) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index c6457f3626d1..62ad356e7f8f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1100,14 +1100,8 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
}
for (auto HeapAllocSite : FI.HeapAllocSites) {
- MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
- MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
-
- // The labels might not be defined if the instruction was replaced
- // somewhere in the codegen pipeline.
- if (!BeginLabel->isDefined() || !EndLabel->isDefined())
- continue;
-
+ const MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+ const MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
const DIType *DITy = std::get<2>(HeapAllocSite);
MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
OS.AddComment("Call site offset");
@@ -1427,6 +1421,16 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
maybeRecordLocation(FnStartDL, MF);
}
+
+ // Find heap alloc sites and emit labels around them.
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MI.getHeapAllocMarker()) {
+ requestLabelBeforeInsn(&MI);
+ requestLabelAfterInsn(&MI);
+ }
+ }
+ }
}
static bool shouldEmitUdt(const DIType *T) {
@@ -2850,8 +2854,18 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
return;
}
+ // Find heap alloc sites and add to list.
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (MDNode *MD = MI.getHeapAllocMarker()) {
+ CurFn->HeapAllocSites.push_back(std::make_tuple(getLabelBeforeInsn(&MI),
+ getLabelAfterInsn(&MI),
+ dyn_cast<DIType>(MD)));
+ }
+ }
+ }
+
CurFn->Annotations = MF->getCodeViewAnnotations();
- CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites();
CurFn->End = Asm->getFunctionEnd();
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 7ffd77926cf7..b56b9047e1a9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -148,7 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LexicalBlock *, 1> ChildBlocks;
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
- std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
+ std::vector<std::tuple<const MCSymbol *, const MCSymbol *, const DIType *>>
HeapAllocSites;
const MCSymbol *Begin = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index f4134da48caa..84b86a71fa5f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -319,8 +319,10 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
{
Die.Owner = this;
assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+ UnitTag == dwarf::DW_TAG_skeleton_unit ||
UnitTag == dwarf::DW_TAG_type_unit ||
- UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG");
+ UnitTag == dwarf::DW_TAG_partial_unit) &&
+ "expected a unit TAG");
}
void DIEValue::EmitValue(const AsmPrinter *AP) const {
@@ -798,6 +800,8 @@ void DIEBlock::print(raw_ostream &O) const {
//===----------------------------------------------------------------------===//
unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_loclistx)
+ return getULEB128Size(Index);
if (Form == dwarf::DW_FORM_data4)
return 4;
if (Form == dwarf::DW_FORM_sec_offset)
@@ -808,6 +812,10 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
/// EmitValue - Emit label value.
///
void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ if (Form == dwarf::DW_FORM_loclistx) {
+ AP->EmitULEB128(Index);
+ return;
+ }
DwarfDebug *DD = AP->getDwarfDebug();
MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 7f9d6c618ad3..170fc8b6d49f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -262,7 +262,9 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
DbgLabels.addInstr(L, MI);
}
- if (MI.isDebugInstr())
+ // Meta Instructions have no output and do not change any values and so
+ // can be safely ignored.
+ if (MI.isMetaInstruction())
continue;
// Not a DBG_VALUE instruction. It may clobber registers which describe
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 17e39b3d3268..36278f2e9e2d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -20,13 +20,33 @@
namespace llvm {
class AsmPrinter;
+/// This struct describes target specific location.
+struct TargetIndexLocation {
+ int Index;
+ int Offset;
+
+ TargetIndexLocation() = default;
+ TargetIndexLocation(unsigned Idx, int64_t Offset)
+ : Index(Idx), Offset(Offset) {}
+
+ bool operator==(const TargetIndexLocation &Other) const {
+ return Index == Other.Index && Offset == Other.Offset;
+ }
+};
+
/// A single location or constant.
class DbgValueLoc {
/// Any complex address location expression for this DbgValueLoc.
const DIExpression *Expression;
/// Type of entry that this represents.
- enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+ enum EntryType {
+ E_Location,
+ E_Integer,
+ E_ConstantFP,
+ E_ConstantInt,
+ E_TargetIndexLocation
+ };
enum EntryType EntryKind;
/// Either a constant,
@@ -36,8 +56,12 @@ class DbgValueLoc {
const ConstantInt *CIP;
} Constant;
- /// Or a location in the machine frame.
- MachineLocation Loc;
+ union {
+ /// Or a location in the machine frame.
+ MachineLocation Loc;
+ /// Or a location from target specific location.
+ TargetIndexLocation TIL;
+ };
public:
DbgValueLoc(const DIExpression *Expr, int64_t i)
@@ -56,8 +80,13 @@ public:
: Expression(Expr), EntryKind(E_Location), Loc(Loc) {
assert(cast<DIExpression>(Expr)->isValid());
}
+ DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc)
+ : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {}
bool isLocation() const { return EntryKind == E_Location; }
+ bool isTargetIndexLocation() const {
+ return EntryKind == E_TargetIndexLocation;
+ }
bool isInt() const { return EntryKind == E_Integer; }
bool isConstantFP() const { return EntryKind == E_ConstantFP; }
bool isConstantInt() const { return EntryKind == E_ConstantInt; }
@@ -65,6 +94,7 @@ public:
const ConstantFP *getConstantFP() const { return Constant.CFP; }
const ConstantInt *getConstantInt() const { return Constant.CIP; }
MachineLocation getLoc() const { return Loc; }
+ TargetIndexLocation getTargetIndexLocation() const { return TIL; }
bool isFragment() const { return getExpression()->isFragment(); }
bool isEntryVal() const { return getExpression()->isEntryValue(); }
const DIExpression *getExpression() const { return Expression; }
@@ -162,6 +192,8 @@ inline bool operator==(const DbgValueLoc &A,
switch (A.EntryKind) {
case DbgValueLoc::E_Location:
return A.Loc == B.Loc;
+ case DbgValueLoc::E_TargetIndexLocation:
+ return A.TIL == B.TIL;
case DbgValueLoc::E_Integer:
return A.Constant.Int == B.Constant.Int;
case DbgValueLoc::E_ConstantFP:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index f483d532ff07..8c6109880afc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -42,4 +42,6 @@ DebugLocStream::ListBuilder::~ListBuilder() {
return;
V.initializeDbgValue(&MI);
V.setDebugLocListIndex(ListIndex);
+ if (TagOffset)
+ V.setDebugLocListTagOffset(*TagOffset);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0db86b09d19a..10019a4720e6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -159,11 +159,17 @@ class DebugLocStream::ListBuilder {
DbgVariable &V;
const MachineInstr &MI;
size_t ListIndex;
+ Optional<uint8_t> TagOffset;
public:
ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
DbgVariable &V, const MachineInstr &MI)
- : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {}
+ : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
+ TagOffset(None) {}
+
+ void setTagOffset(uint8_t TO) {
+ TagOffset = TO;
+ }
/// Finalize the list.
///
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 207a7284dafa..facbf22946e4 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -133,6 +134,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
if (!hasEmittedCFISections) {
if (Asm->needsOnlyDebugCFIMoves())
Asm->OutStreamer->EmitCFISections(false, true);
+ else if (Asm->TM.Options.ForceDwarfFrameSection)
+ Asm->OutStreamer->EmitCFISections(true, true);
hasEmittedCFISections = true;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a61c98ec1c18..38011102c7b3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -52,10 +52,23 @@
using namespace llvm;
+static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
+
+ // According to DWARF Debugging Information Format Version 5,
+ // 3.1.2 Skeleton Compilation Unit Entries:
+ // "When generating a split DWARF object file (see Section 7.3.2
+ // on page 187), the compilation unit in the .debug_info section
+ // is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit"
+ if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton)
+ return dwarf::DW_TAG_skeleton_unit;
+
+ return dwarf::DW_TAG_compile_unit;
+}
+
DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
AsmPrinter *A, DwarfDebug *DW,
- DwarfFile *DWU)
- : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) {
+ DwarfFile *DWU, UnitKind Kind)
+ : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
insertDIE(Node, &getUnitDie());
MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
}
@@ -65,10 +78,6 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
// Don't use the address pool in non-fission or in the skeleton unit itself.
- // FIXME: Once GDB supports this, it's probably worthwhile using the address
- // pool from the skeleton - maybe even in non-fission (possibly fewer
- // relocations by sharing them in the pool, but we have other ideas about how
- // to reduce the number of relocations as well/instead).
if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
return addLocalLabelAddress(Die, Attribute, Label);
@@ -490,10 +499,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
const MCSymbol *RangeSectionSym =
TLOF.getDwarfRangesSection()->getBeginSymbol();
if (isDwoUnit())
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
RangeSectionSym);
else
- addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
RangeSectionSym);
}
}
@@ -602,6 +611,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
unsigned Offset = DV.getDebugLocListIndex();
if (Offset != ~0U) {
addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+ auto TagOffset = DV.getDebugLocListTagOffset();
+ if (TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *TagOffset);
return VariableDie;
}
@@ -619,6 +632,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DwarfExpr.addUnsignedConstant(DVal->getInt());
DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+ dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+
} else
addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
} else if (DVal->isConstantFP()) {
@@ -951,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
- assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ DIE *CalleeDIE = getDIE(CalleeSP);
+ assert(CalleeDIE && "Could not find DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
@@ -1185,6 +1202,10 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
// Now attach the location information to the DIE.
addBlock(Die, Attribute, DwarfExpr.finalize());
+
+ if (DwarfExpr.TagOffset)
+ addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
}
/// Start with the address based on the location provided, and generate the
@@ -1215,13 +1236,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
// Now attach the location information to the DIE.
addBlock(Die, Attribute, DwarfExpr.finalize());
+
+ if (DwarfExpr.TagOffset)
+ addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
}
/// Add a Dwarf loclistptr attribute data and value.
void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
- dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
- : dwarf::DW_FORM_data4;
+ dwarf::Form Form = dwarf::DW_FORM_data4;
+ if (DD->getDwarfVersion() == 4)
+ Form =dwarf::DW_FORM_sec_offset;
+ if (DD->getDwarfVersion() >= 5)
+ Form =dwarf::DW_FORM_loclistx;
Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 1b7ea2673ac0..8491d078ed89 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -40,6 +40,8 @@ class MCExpr;
class MCSymbol;
class MDNode;
+enum class UnitKind { Skeleton, Full };
+
class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
@@ -104,7 +106,8 @@ class DwarfCompileUnit final : public DwarfUnit {
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU);
+ DwarfDebug *DW, DwarfFile *DWU,
+ UnitKind Kind = UnitKind::Full);
bool hasRangeLists() const { return HasRangeLists; }
unsigned getUniqueID() const { return UniqueID; }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c505e77e5acd..fa6800de7955 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -241,6 +241,11 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
return DbgValueLoc(Expr, MLoc);
}
+ if (MI->getOperand(0).isTargetIndex()) {
+ auto Op = MI->getOperand(0);
+ return DbgValueLoc(Expr,
+ TargetIndexLocation(Op.getIndex(), Op.getOffset()));
+ }
if (MI->getOperand(0).isImm())
return DbgValueLoc(Expr, MI->getOperand(0).getImm());
if (MI->getOperand(0).isFPImm())
@@ -535,6 +540,14 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
+ DICompileUnit *Unit = SP->getUnit();
+ assert(SP->isDefinition() && "Subprogram not a definition");
+ assert(Unit && "Subprogram definition without parent unit");
+ auto &CU = getOrCreateDwarfCompileUnit(Unit);
+ return *CU.getOrCreateSubprogramDIE(SP);
+}
+
/// Try to interpret values loaded into registers that forward parameters
/// for \p CallMI. Store parameters with interpreted value into \p Params.
static void collectCallSiteParameters(const MachineInstr *CallMI,
@@ -595,7 +608,6 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
Implicit.push_back(FwdReg);
else
Explicit.push_back(FwdReg);
- break;
}
}
}
@@ -615,8 +627,12 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
++NumCSParams;
};
- // Search for a loading value in forwaring registers.
+ // Search for a loading value in forwarding registers.
for (; I != MBB->rend(); ++I) {
+ // Skip bundle headers.
+ if (I->isBundle())
+ continue;
+
// If the next instruction is a call we can not interpret parameter's
// forwarding registers or we finished the interpretation of all parameters.
if (I->isCall())
@@ -636,32 +652,33 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs))
ForwardedRegWorklist.erase(Reg);
- // The describeLoadedValue() hook currently does not have any information
- // about which register it should describe in case of multiple defines, so
- // for now we only handle instructions where a forwarded register is (at
- // least partially) defined by the instruction's single explicit define.
- if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty())
- continue;
- unsigned Reg = ExplicitFwdRegDefs[0];
-
- if (auto ParamValue = TII->describeLoadedValue(*I)) {
- if (ParamValue->first.isImm()) {
- int64_t Val = ParamValue->first.getImm();
- DbgValueLoc DbgLocVal(ParamValue->second, Val);
- finishCallSiteParam(DbgLocVal, Reg);
- } else if (ParamValue->first.isReg()) {
- Register RegLoc = ParamValue->first.getReg();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- Register FP = TRI->getFrameRegister(*MF);
- bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
- if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
- DbgValueLoc DbgLocVal(ParamValue->second,
- MachineLocation(RegLoc,
- /*IsIndirect=*/IsSPorFP));
- finishCallSiteParam(DbgLocVal, Reg);
- } else if (ShouldTryEmitEntryVals) {
- ForwardedRegWorklist.insert(RegLoc);
- RegsForEntryValues[RegLoc] = Reg;
+ for (auto ParamFwdReg : ExplicitFwdRegDefs) {
+ if (auto ParamValue = TII->describeLoadedValue(*I, ParamFwdReg)) {
+ if (ParamValue->first.isImm()) {
+ int64_t Val = ParamValue->first.getImm();
+ DbgValueLoc DbgLocVal(ParamValue->second, Val);
+ finishCallSiteParam(DbgLocVal, ParamFwdReg);
+ } else if (ParamValue->first.isReg()) {
+ Register RegLoc = ParamValue->first.getReg();
+ // TODO: For now, there is no use of describing the value loaded into the
+ // register that is also the source registers (e.g. $r0 = add $r0, x).
+ if (ParamFwdReg == RegLoc)
+ continue;
+
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+ if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+ DbgValueLoc DbgLocVal(ParamValue->second,
+ MachineLocation(RegLoc,
+ /*IsIndirect=*/IsSPorFP));
+ finishCallSiteParam(DbgLocVal, ParamFwdReg);
+ // TODO: Add support for entry value plus an expression.
+ } else if (ShouldTryEmitEntryVals &&
+ ParamValue->second->getNumElements() == 0) {
+ ForwardedRegWorklist.insert(RegLoc);
+ RegsForEntryValues[RegLoc] = ParamFwdReg;
+ }
}
}
}
@@ -707,6 +724,12 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// Emit call site entries for each call or tail call in the function.
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB.instrs()) {
+ // Bundles with call in them will pass the isCall() test below but do not
+ // have callee operand information so skip them here. Iterator will
+ // eventually reach the call MI.
+ if (MI.isBundle())
+ continue;
+
// Skip instructions which aren't calls. Both calls and tail-calling jump
// instructions (e.g TAILJMPd64) are classified correctly here.
if (!MI.isCall())
@@ -735,25 +758,45 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
CalleeSP = CalleeDecl->getSubprogram();
+
+ if (CalleeSP->isDefinition()) {
+ // Ensure that a subprogram DIE for the callee is available in the
+ // appropriate CU.
+ constructSubprogramDefinitionDIE(CalleeSP);
+ } else {
+ // Create the declaration DIE if it is missing. This is required to
+ // support compilation of old bitcode with an incomplete list of
+ // retained metadata.
+ CU.getOrCreateSubprogramDIE(CalleeSP);
+ }
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
bool IsTail = TII->isTailCall(MI);
+ // If MI is in a bundle, the label was created after the bundle since
+ // EmitFunctionBody iterates over top-level MIs. Get that top-level MI
+ // to search for that label below.
+ const MachineInstr *TopLevelCallMI =
+ MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;
+
// For tail calls, for non-gdb tuning, no return PC information is needed.
// For regular calls (and tail calls in GDB tuning), the return PC
// is needed to disambiguate paths in the call graph which could lead to
// some target function.
const MCExpr *PCOffset =
- (IsTail && !tuneForGDB()) ? nullptr
- : getFunctionLocalOffsetAfterInsn(&MI);
+ (IsTail && !tuneForGDB())
+ ? nullptr
+ : getFunctionLocalOffsetAfterInsn(TopLevelCallMI);
- // Address of a call-like instruction for a normal call or a jump-like
- // instruction for a tail call. This is needed for GDB + DWARF 4 tuning.
+ // Return address of a call-like instruction for a normal call or a
+ // jump-like instruction for a tail call. This is needed for
+ // GDB + DWARF 4 tuning.
const MCSymbol *PCAddr =
- ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI))
- : nullptr;
+ ApplyGNUExtensions
+ ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
+ : nullptr;
assert((IsTail || PCOffset || PCAddr) &&
"Call without return PC information");
@@ -837,10 +880,13 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
// This CU is either a clang module DWO or a skeleton CU.
NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
DIUnit->getDWOId());
- if (!DIUnit->getSplitDebugFilename().empty())
+ if (!DIUnit->getSplitDebugFilename().empty()) {
// This is a prefabricated skeleton CU.
- NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
- DIUnit->getSplitDebugFilename());
+ dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_dwo_name
+ : dwarf::DW_AT_GNU_dwo_name;
+ NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename());
+ }
}
}
// Create new DwarfCompileUnit for the given metadata node with tag
@@ -878,11 +924,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
- // Create DIEs for function declarations used for call site debug info.
- for (auto Scope : DIUnit->getRetainedTypes())
- if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
- NewCU.getOrCreateSubprogramDIE(SP);
-
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
@@ -974,6 +1015,7 @@ void DwarfDebug::beginModule() {
// Create the symbol that points to the first entry following the debug
// address table (.debug_addr) header.
AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
+ DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
// FIXME: Move local imported entities into a list attached to the
@@ -1077,11 +1119,17 @@ void DwarfDebug::finalizeModuleInfo() {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
- if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) {
+
+ bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
+
+ if (HasSplitUnit) {
+ dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+ ? dwarf::DW_AT_dwo_name
+ : dwarf::DW_AT_GNU_dwo_name;
finishUnitAttributes(TheCU.getCUNode(), TheCU);
- TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ TheCU.addString(TheCU.getUnitDie(), attrDWOName,
Asm->TM.Options.MCOptions.SplitDwarfFile);
- SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+ SkCU->addString(SkCU->getUnitDie(), attrDWOName,
Asm->TM.Options.MCOptions.SplitDwarfFile);
// Emit a unique identifier for this CU.
uint64_t ID =
@@ -1127,29 +1175,34 @@ void DwarfDebug::finalizeModuleInfo() {
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty() &&
- (getDwarfVersion() >= 5 ||
- (SkCU && !TheCU.getUnitDie().children().empty())))
+ if ((!AddrPool.isEmpty() || TheCU.hasRangeLists()) &&
+ (getDwarfVersion() >= 5 || HasSplitUnit))
U.addAddrTableBase();
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
- if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
- DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
- DebugLocs.getSym(),
- TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ if (!DebugLocs.getLists().empty()) {
+ if (!useSplitDwarf())
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
}
auto *CUNode = cast<DICompileUnit>(P.first);
// If compile Unit has macros, emit "DW_AT_macro_info" attribute.
- if (CUNode->getMacros())
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
- U.getMacroLabelBegin(),
- TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ if (CUNode->getMacros()) {
+ if (useSplitDwarf())
+ TheCU.addSectionDelta(TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
+ else
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ U.getMacroLabelBegin(),
+ TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+ }
}
// Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
@@ -1185,9 +1238,10 @@ void DwarfDebug::endModule() {
emitDebugStr();
if (useSplitDwarf())
+ // Emit debug_loc.dwo/debug_loclists.dwo section.
emitDebugLocDWO();
else
- // Emit info into a debug loc section.
+ // Emit debug_loc/debug_loclists section.
emitDebugLoc();
// Corresponding abbreviations into a abbrev section.
@@ -1203,8 +1257,12 @@ void DwarfDebug::endModule() {
// Emit info into a debug ranges section.
emitDebugRanges();
+ if (useSplitDwarf())
+ // Emit info into a debug macinfo.dwo section.
+ emitDebugMacinfoDWO();
+ else
// Emit info into a debug macinfo section.
- emitDebugMacinfo();
+ emitDebugMacinfo();
if (useSplitDwarf()) {
emitDebugStrDWO();
@@ -2208,6 +2266,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
return DwarfExpr.addExpression(std::move(Cursor));
+ } else if (Value.isTargetIndexLocation()) {
+ TargetIndexLocation Loc = Value.getTargetIndexLocation();
+ // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
+ // encoding is supported.
+ DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset);
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
DwarfExpr.addUnsignedConstant(RawBytes);
@@ -2242,6 +2305,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
}
DwarfExpr.finalize();
+ if (DwarfExpr.TagOffset)
+ List.setTagOffset(*DwarfExpr.TagOffset);
}
void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
@@ -2296,7 +2361,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
for (const RangeSpanList &List : Holder.getRangeLists())
- Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(),
+ Asm->EmitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
4);
return TableEnd;
@@ -2313,12 +2378,13 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
const auto &DebugLocs = DD.getDebugLocs();
- // FIXME: Generate the offsets table and use DW_FORM_loclistx with the
- // DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
Asm->OutStreamer->AddComment("Offset entry count");
- Asm->emitInt32(0);
+ Asm->emitInt32(DebugLocs.getLists().size());
Asm->OutStreamer->EmitLabel(DebugLocs.getSym());
+ for (const auto &List : DebugLocs.getLists())
+ Asm->EmitLabelDifference(List.Label, DebugLocs.getSym(), 4);
+
return TableEnd;
}
@@ -2418,32 +2484,27 @@ static void emitRangeList(
}
}
+// Handles emission of both debug_loclist / debug_loclist.dwo
static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
- emitRangeList(
- DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU,
- dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair,
- dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list,
- llvm::dwarf::LocListEncodingString,
- /* ShouldUseBaseAddress */ true,
- [&](const DebugLocStream::Entry &E) {
- DD.emitDebugLocEntryLocation(E, List.CU);
- });
+ emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List),
+ *List.CU, dwarf::DW_LLE_base_addressx,
+ dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length,
+ dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString,
+ /* ShouldUseBaseAddress */ true,
+ [&](const DebugLocStream::Entry &E) {
+ DD.emitDebugLocEntryLocation(E, List.CU);
+ });
}
-// Emit locations into the .debug_loc/.debug_rnglists section.
-void DwarfDebug::emitDebugLoc() {
+void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
if (DebugLocs.getLists().empty())
return;
+ Asm->OutStreamer->SwitchSection(Sec);
+
MCSymbol *TableEnd = nullptr;
- if (getDwarfVersion() >= 5) {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLoclistsSection());
+ if (getDwarfVersion() >= 5)
TableEnd = emitLoclistsTableHeader(Asm, *this);
- } else {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
- }
for (const auto &List : DebugLocs.getLists())
emitLocList(*this, Asm, List);
@@ -2452,11 +2513,28 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer->EmitLabel(TableEnd);
}
+// Emit locations into the .debug_loc/.debug_loclists section.
+void DwarfDebug::emitDebugLoc() {
+ emitDebugLocImpl(
+ getDwarfVersion() >= 5
+ ? Asm->getObjFileLowering().getDwarfLoclistsSection()
+ : Asm->getObjFileLowering().getDwarfLocSection());
+}
+
+// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section.
void DwarfDebug::emitDebugLocDWO() {
+ if (getDwarfVersion() >= 5) {
+ emitDebugLocImpl(
+ Asm->getObjFileLowering().getDwarfLoclistsDWOSection());
+
+ return;
+ }
+
for (const auto &List : DebugLocs.getLists()) {
Asm->OutStreamer->SwitchSection(
Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->EmitLabel(List.Label);
+
for (const auto &Entry : DebugLocs.getEntries(List)) {
// GDB only supports startx_length in pre-standard split-DWARF.
// (in v5 standard loclists, it currently* /only/ supports base_address +
@@ -2468,8 +2546,9 @@ void DwarfDebug::emitDebugLocDWO() {
Asm->emitInt8(dwarf::DW_LLE_startx_length);
unsigned idx = AddrPool.getIndex(Entry.Begin);
Asm->EmitULEB128(idx);
+ // Also the pre-standard encoding is slightly different, emitting this as
+ // an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4);
-
emitDebugLocEntryLocation(Entry, List.CU);
}
Asm->emitInt8(dwarf::DW_LLE_end_of_list);
@@ -2639,19 +2718,33 @@ void DwarfDebug::emitDebugARanges() {
/// Emit a single range list. We handle both DWARF v5 and earlier.
static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
- emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(),
+ emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU,
dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
llvm::dwarf::RangeListEncodingString,
- List.getCU().getCUNode()->getRangesBaseAddress() ||
+ List.CU->getCUNode()->getRangesBaseAddress() ||
DD.getDwarfVersion() >= 5,
[](auto) {});
}
-static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
- const DwarfFile &Holder, MCSymbol *TableEnd) {
+void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) {
+ if (Holder.getRangeLists().empty())
+ return;
+
+ assert(useRangesSection());
+ assert(!CUMap.empty());
+ assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+ return !Pair.second->getCUNode()->isDebugDirectivesOnly();
+ }));
+
+ Asm->OutStreamer->SwitchSection(Section);
+
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5)
+ TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
for (const RangeSpanList &List : Holder.getRangeLists())
- emitRangeList(DD, Asm, List);
+ emitRangeList(*this, Asm, List);
if (TableEnd)
Asm->OutStreamer->EmitLabel(TableEnd);
@@ -2660,55 +2753,17 @@ static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
/// Emit address ranges into the .debug_ranges section or into the DWARF v5
/// .debug_rnglists section.
void DwarfDebug::emitDebugRanges() {
- if (CUMap.empty())
- return;
-
const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
- if (Holder.getRangeLists().empty())
- return;
-
- assert(useRangesSection());
- assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getCUNode()->isDebugDirectivesOnly();
- }));
-
- // Start the dwarf ranges section.
- MCSymbol *TableEnd = nullptr;
- if (getDwarfVersion() >= 5) {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRnglistsSection());
- TableEnd = emitRnglistsTableHeader(Asm, Holder);
- } else
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRangesSection());
-
- emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+ emitDebugRangesImpl(Holder,
+ getDwarfVersion() >= 5
+ ? Asm->getObjFileLowering().getDwarfRnglistsSection()
+ : Asm->getObjFileLowering().getDwarfRangesSection());
}
void DwarfDebug::emitDebugRangesDWO() {
- assert(useSplitDwarf());
-
- if (CUMap.empty())
- return;
-
- const auto &Holder = InfoHolder;
-
- if (Holder.getRangeLists().empty())
- return;
-
- assert(getDwarfVersion() >= 5);
- assert(useRangesSection());
- assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getCUNode()->isDebugDirectivesOnly();
- }));
-
- // Start the dwarf ranges section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
- MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder);
-
- emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+ emitDebugRangesImpl(InfoHolder,
+ Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
}
void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -2745,35 +2800,30 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
}
-/// Emit macros into a debug macinfo section.
-void DwarfDebug::emitDebugMacinfo() {
- if (CUMap.empty())
- return;
-
- if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
- return Pair.second->getCUNode()->isDebugDirectivesOnly();
- }))
- return;
-
- // Start the dwarf macinfo section.
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfMacinfoSection());
-
+void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
for (const auto &P : CUMap) {
auto &TheCU = *P.second;
- if (TheCU.getCUNode()->isDebugDirectivesOnly())
- continue;
auto *SkCU = TheCU.getSkeleton();
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
auto *CUNode = cast<DICompileUnit>(P.first);
DIMacroNodeArray Macros = CUNode->getMacros();
- if (!Macros.empty()) {
- Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
- handleMacroNodes(Macros, U);
- }
+ if (Macros.empty())
+ continue;
+ Asm->OutStreamer->SwitchSection(Section);
+ Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+ handleMacroNodes(Macros, U);
+ Asm->OutStreamer->AddComment("End Of Macro List Mark");
+ Asm->emitInt8(0);
}
- Asm->OutStreamer->AddComment("End Of Macro List Mark");
- Asm->emitInt8(0);
+}
+
+/// Emit macros into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+ emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoSection());
+}
+
+void DwarfDebug::emitDebugMacinfoDWO() {
+ emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoDWOSection());
}
// DWARF5 Experimental Separate Dwarf emitters.
@@ -2792,7 +2842,8 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
- CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
+ CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder,
+ UnitKind::Skeleton);
DwarfCompileUnit &NewCU = *OwnedUnit;
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index c8c511f67c2a..fd82b1f98055 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -118,6 +118,9 @@ public:
class DbgVariable : public DbgEntity {
/// Offset in DebugLocs.
unsigned DebugLocListIndex = ~0u;
+ /// DW_OP_LLVM_tag_offset value from DebugLocs.
+ Optional<uint8_t> DebugLocListTagOffset;
+
/// Single value location description.
std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
@@ -174,6 +177,8 @@ public:
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+ void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
+ Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
StringRef getName() const { return getVariable()->getName(); }
const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
/// Get the FI entries, sorted by fragment offset.
@@ -437,6 +442,9 @@ class DwarfDebug : public DebugHandlerBase {
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
+ /// Construct a DIE for the subprogram definition \p SP and return it.
+ DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
+
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
@@ -498,15 +506,21 @@ class DwarfDebug : public DebugHandlerBase {
/// Emit variable locations into a debug loc dwo section.
void emitDebugLocDWO();
+ void emitDebugLocImpl(MCSection *Sec);
+
/// Emit address ranges into a debug aranges section.
void emitDebugARanges();
/// Emit address ranges into a debug ranges section.
void emitDebugRanges();
void emitDebugRangesDWO();
+ void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section);
/// Emit macros into a debug macinfo section.
void emitDebugMacinfo();
+ /// Emit macros into a debug macinfo.dwo section.
+ void emitDebugMacinfoDWO();
+ void emitDebugMacinfoImpl(MCSection *Section);
void emitMacro(DIMacro &M);
void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1c5a244d7c5d..310647f15a5e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -155,20 +155,18 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
CurSubReg.set(Offset, Offset + Size);
// If this sub-register has a DWARF number and we haven't covered
- // its range, emit a DWARF piece for it.
- if (CurSubReg.test(Coverage)) {
+ // its range, and its range covers the value, emit a DWARF piece for it.
+ if (Offset < MaxSize && CurSubReg.test(Coverage)) {
// Emit a piece for any gap in the coverage.
if (Offset > CurPos)
- DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"});
+ DwarfRegs.push_back(
+ {-1, Offset - CurPos, "no DWARF register encoding"});
DwarfRegs.push_back(
{Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
- if (Offset >= MaxSize)
- break;
-
- // Mark it as emitted.
- Coverage.set(Offset, Offset + Size);
- CurPos = Offset + Size;
}
+ // Mark it as emitted.
+ Coverage.set(Offset, Offset + Size);
+ CurPos = Offset + Size;
}
// Failed to find any DWARF encoding.
if (CurPos == 0)
@@ -246,8 +244,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// a call site parameter expression and if that expression is just a register
// location, emit it with addBReg and offset 0, because we should emit a DWARF
// expression representing a value, rather than a location.
- if (!isMemoryLocation() && !HasComplexExpression &&
- (!isParameterValue() || isEntryValue())) {
+ if (!isMemoryLocation() && !HasComplexExpression && (!isParameterValue() ||
+ isEntryValue())) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
@@ -391,6 +389,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
// empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
// location.
assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+ assert(SizeInBits >= OffsetInBits - FragmentOffset && "size underflow");
// If addMachineReg already emitted DW_OP_piece operations to represent
// a super-register by splicing together sub-registers, subtract the size
@@ -436,9 +435,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
break;
case dwarf::DW_OP_deref:
assert(!isRegisterLocation());
- // For more detailed explanation see llvm.org/PR43343.
- assert(!isParameterValue() && "Parameter entry values should not be "
- "dereferenced due to safety reasons.");
if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
@@ -576,3 +572,11 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
emitUnsigned((1ULL << FromBits) - 1);
emitOp(dwarf::DW_OP_and);
}
+
+void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) {
+ assert(LocationKind == Implicit || LocationKind == Unknown);
+ LocationKind = Implicit;
+ emitOp(dwarf::DW_OP_WASM_location);
+ emitUnsigned(Index);
+ emitSigned(Offset);
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 1ad46669f9b2..46c07b1d5b6b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -337,6 +337,10 @@ public:
void emitLegacySExt(unsigned FromBits);
void emitLegacyZExt(unsigned FromBits);
+
+ /// Emit location information expressed via WebAssembly location + offset
+ /// The Index is an identifier for locals, globals or operand stack.
+ void addWasmLocation(unsigned Index, int64_t Offset);
};
/// DwarfExpression implementation for .debug_loc entries.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e3c9095d1343..e5c4db58f477 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -126,6 +126,6 @@ void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
std::pair<uint32_t, RangeSpanList *>
DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
CURangeLists.push_back(
- RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R)));
+ RangeSpanList{Asm->createTempSymbol("debug_ranges"), &CU, std::move(R)});
return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 35fa51fb24c4..cf293d7534d0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,21 +37,12 @@ struct RangeSpan {
const MCSymbol *End;
};
-class RangeSpanList {
-private:
+struct RangeSpanList {
// Index for locating within the debug_range section this particular span.
- MCSymbol *RangeSym;
+ MCSymbol *Label;
const DwarfCompileUnit *CU;
// List of ranges.
SmallVector<RangeSpan, 2> Ranges;
-
-public:
- RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU,
- SmallVector<RangeSpan, 2> Ranges)
- : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {}
- MCSymbol *getSym() const { return RangeSym; }
- const DwarfCompileUnit &getCU() const { return *CU; }
- const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
};
class DwarfFile {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 37c68c085792..1aba956c48de 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -188,8 +188,9 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system, the DIE can be shared
- // across CUs.
+ // When the MDNode can be part of the type system (this includes subprogram
+ // declarations *and* subprogram definitions, even local definitions), the
+ // DIE must be shared across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
@@ -197,9 +198,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) ||
- (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
- !DD->generateTypeUnits();
+ return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -800,6 +799,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ // If alignment is specified for a typedef , create and insert DW_AT_alignment
+ // attribute in DW_TAG_typedef DIE.
+ if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
+ uint32_t AlignInBytes = DTy->getAlignInBytes();
+ if (AlignInBytes > 0)
+ addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+ AlignInBytes);
+ }
+
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type
&& Tag != dwarf::DW_TAG_ptr_to_member_type
@@ -1114,8 +1122,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
M->getConfigurationMacros());
if (!M->getIncludePath().empty())
addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
- if (!M->getISysRoot().empty())
- addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
+ if (!M->getSysRoot().empty())
+ addString(MDie, dwarf::DW_AT_LLVM_sysroot, M->getSysRoot());
return &MDie;
}
@@ -1224,6 +1232,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
Language == dwarf::DW_LANG_ObjC))
addFlag(SPDie, dwarf::DW_AT_prototyped);
+ if (SP->isObjCDirect())
+ addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct);
+
unsigned CC = 0;
DITypeRefArray Args;
if (const DISubroutineType *SPTy = SP->getType()) {
@@ -1307,6 +1318,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
addFlag(SPDie, dwarf::DW_AT_elemental);
if (SP->isRecursive())
addFlag(SPDie, dwarf::DW_AT_recursive);
+
+ if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
+ addFlag(SPDie, dwarf::DW_AT_deleted);
}
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 290be81c6baa..914308d9147e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing Win64 exception info into asm files.
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
//
//===----------------------------------------------------------------------===//
@@ -17,6 +18,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -29,16 +31,69 @@ WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {}
WinCFGuard::~WinCFGuard() {}
+void WinCFGuard::endFunction(const MachineFunction *MF) {
+
+ // Skip functions without any longjmp targets.
+ if (MF->getLongjmpTargets().empty())
+ return;
+
+ // Copy the function's longjmp targets to a module-level list.
+ LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(),
+ MF->getLongjmpTargets().end());
+}
+
+/// Returns true if this function's address is escaped in a way that might make
+/// it an indirect call target. Function::hasAddressTaken gives different
+/// results when a function is called directly with a function prototype
+/// mismatch, which requires a cast.
+static bool isPossibleIndirectCallTarget(const Function *F) {
+ SmallVector<const Value *, 4> Users{F};
+ while (!Users.empty()) {
+ const Value *FnOrCast = Users.pop_back_val();
+ for (const Use &U : FnOrCast->uses()) {
+ const User *FnUser = U.getUser();
+ if (isa<BlockAddress>(FnUser))
+ continue;
+ if (const auto *Call = dyn_cast<CallBase>(FnUser)) {
+ if (!Call->isCallee(&U))
+ return true;
+ } else if (isa<Instruction>(FnUser)) {
+ // Consider any other instruction to be an escape. This has some weird
+ // consequences like no-op intrinsics being an escape or a store *to* a
+ // function address being an escape.
+ return true;
+ } else if (const auto *C = dyn_cast<Constant>(FnUser)) {
+ // If this is a constant pointer cast of the function, don't consider
+ // this escape. Analyze the uses of the cast as well. This ensures that
+ // direct calls with mismatched prototypes don't end up in the CFG
+ // table. Consider other constants, such as vtable initializers, to
+ // escape the function.
+ if (C->stripPointerCasts() == F)
+ Users.push_back(FnUser);
+ else
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
void WinCFGuard::endModule() {
const Module *M = Asm->MMI->getModule();
std::vector<const Function *> Functions;
for (const Function &F : *M)
- if (F.hasAddressTaken())
+ if (isPossibleIndirectCallTarget(&F))
Functions.push_back(&F);
- if (Functions.empty())
+ if (Functions.empty() && LongjmpTargets.empty())
return;
auto &OS = *Asm->OutStreamer;
OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
for (const Function *F : Functions)
OS.EmitCOFFSymbolIndex(Asm->getSymbol(F));
+
+ // Emit the symbol index of each longjmp target.
+ OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
+ for (const MCSymbol *S : LongjmpTargets) {
+ OS.EmitCOFFSymbolIndex(S);
+ }
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index def0a59ab007..494a153b05ba 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing windows exception info into asm files.
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
//
//===----------------------------------------------------------------------===//
@@ -15,12 +16,14 @@
#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/Support/Compiler.h"
+#include <vector>
namespace llvm {
class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
/// Target of directive emission.
AsmPrinter *Asm;
+ std::vector<const MCSymbol *> LongjmpTargets;
public:
WinCFGuard(AsmPrinter *A);
@@ -28,7 +31,7 @@ public:
void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
- /// Emit the Control Flow Guard function ID table
+ /// Emit the Control Flow Guard function ID table.
void endModule() override;
/// Gather pre-function debug information.
@@ -39,7 +42,7 @@ public:
/// Gather post-function debug information.
/// Please note that some AsmPrinter implementations may not call
/// beginFunction at all.
- void endFunction(const MachineFunction *MF) override {}
+ void endFunction(const MachineFunction *MF) override;
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override {}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 27b298dcf6af..37a50cde6391 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -674,7 +675,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
Ret.Mask = Builder.CreateShl(
- ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
+ ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
"Mask");
Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 455916eeb82f..4b9c50aeb1d3 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -38,6 +39,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -46,6 +48,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -102,6 +105,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -128,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::MBFIWrapper MBBFreqInfo(
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
- getAnalysis<MachineBranchProbabilityInfo>());
+ getAnalysis<MachineBranchProbabilityInfo>(),
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
return Folder.OptimizeFunction(
MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
@@ -138,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
+ ProfileSummaryInfo *PSI,
unsigned MinTailLength)
: EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
- MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
+ MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
if (MinCommonTailLength == 0)
MinCommonTailLength = TailMergeSize;
switch (FlagEnableTailMerge) {
@@ -301,113 +307,56 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
return HashMachineInstr(*I);
}
-/// Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
static bool countsAsInstruction(const MachineInstr &MI) {
return !(MI.isDebugInstr() || MI.isCFIInstruction());
}
-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end. Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+ MachineBasicBlock *MBB) {
+ while (I != MBB->begin()) {
+ --I;
+ if (countsAsInstruction(*I))
+ return I;
+ }
+ return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2) {
- I1 = MBB1->end();
- I2 = MBB2->end();
+ MachineBasicBlock::iterator MBBI1 = MBB1->end();
+ MachineBasicBlock::iterator MBBI2 = MBB2->end();
unsigned TailLen = 0;
- while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
- --I1; --I2;
- // Skip debugging pseudos; necessary to avoid changing the code.
- while (!countsAsInstruction(*I1)) {
- if (I1==MBB1->begin()) {
- while (!countsAsInstruction(*I2)) {
- if (I2==MBB2->begin()) {
- // I1==DBG at begin; I2==DBG at begin
- goto SkipTopCFIAndReturn;
- }
- --I2;
- }
- ++I2;
- // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
- goto SkipTopCFIAndReturn;
- }
- --I1;
- }
- // I1==first (untested) non-DBG preceding known match
- while (!countsAsInstruction(*I2)) {
- if (I2==MBB2->begin()) {
- ++I1;
- // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
- goto SkipTopCFIAndReturn;
- }
- --I2;
- }
- // I1, I2==first (untested) non-DBGs preceding known match
- if (!I1->isIdenticalTo(*I2) ||
+ while (true) {
+ MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+ MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+ if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+ break;
+ if (!MBBI1->isIdenticalTo(*MBBI2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
// optimizations (like this one) may reorder and/or merge these
// directives.
- I1->isInlineAsm()) {
- ++I1; ++I2;
+ MBBI1->isInlineAsm()) {
break;
}
++TailLen;
- }
- // Back past possible debugging pseudos at beginning of block. This matters
- // when one block differs from the other only by whether debugging pseudos
- // are present at the beginning. (This way, the various checks later for
- // I1==MBB1->begin() work as expected.)
- if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
- --I2;
- while (I2->isDebugInstr()) {
- if (I2 == MBB2->begin())
- return TailLen;
- --I2;
- }
- ++I2;
- }
- if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
- --I1;
- while (I1->isDebugInstr()) {
- if (I1 == MBB1->begin())
- return TailLen;
- --I1;
- }
- ++I1;
- }
-
-SkipTopCFIAndReturn:
- // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
- // I1 and I2 are non-identical when compared and then one or both of them ends
- // up pointing to a CFI instruction after being incremented. For example:
- /*
- BB1:
- ...
- INSTRUCTION_A
- ADD32ri8 <- last common instruction
- ...
- BB2:
- ...
- INSTRUCTION_B
- CFI_INSTRUCTION
- ADD32ri8 <- last common instruction
- ...
- */
- // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
- // incrementing the iterators, I1 will point to ADD, however I2 will point to
- // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
- // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
- // instruction.
- while (I1 != MBB1->end() && I1->isCFIInstruction()) {
- ++I1;
- }
-
- while (I2 != MBB2->end() && I2->isCFIInstruction()) {
- ++I2;
+ I1 = MBBI1;
+ I2 = MBBI2;
}
return TailLen;
@@ -500,7 +449,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
continue;
if (I->isCall())
Time += 10;
- else if (I->mayLoad() || I->mayStore())
+ else if (I->mayLoadOrStore())
Time += 2;
else
++Time;
@@ -641,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB,
DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
- bool AfterPlacement) {
+ bool AfterPlacement,
+ BranchFolder::MBFIWrapper &MBBFreqInfo,
+ ProfileSummaryInfo *PSI) {
// It is never profitable to tail-merge blocks from two different EH scopes.
if (!EHScopeMembership.empty()) {
auto EHScope1 = EHScopeMembership.find(MBB1);
@@ -659,6 +610,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
<< " and " << printMBBReference(*MBB2) << " is "
<< CommonTailLen << '\n');
+ // Move the iterators to the beginning of the MBB if we only got debug
+ // instructions before the tail. This is to avoid splitting a block when we
+ // only got debug instructions before the tail (to be invariant on -g).
+ if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+ I1 = MBB1->begin();
+ if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+ I2 = MBB2->begin();
+
+ bool FullBlockTail1 = I1 == MBB1->begin();
+ bool FullBlockTail2 = I2 == MBB2->begin();
+
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
// This is true only for a single successor. For multiple successors, we are
@@ -677,7 +639,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// are unlikely to become a fallthrough target after machine block placement.
// Tail merging these blocks is unlikely to create additional unconditional
// branches, and will reduce the size of this cold code.
- if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+ if (FullBlockTail1 && FullBlockTail2 &&
blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
return true;
@@ -685,16 +647,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
// TODO: If the blocks are not adjacent, move one of them so that they are?
- if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
return true;
- if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
return true;
// If both blocks are identical and end in a branch, merge them unless they
// both have a fallthrough predecessor and successor.
// We can only do this after block placement because it depends on whether
// there are fallthroughs, and we don't know until after layout.
- if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+ if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
if (MBB->succ_size() != 0 && !MBB->canFallThrough())
return false;
@@ -727,8 +689,12 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
- (I1 == MBB1->begin() || I2 == MBB2->begin());
+ bool OptForSize =
+ MF->getFunction().hasOptSize() ||
+ (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
+ llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
+ return EffectiveTailLen >= 2 && OptForSize &&
+ (FullBlockTail1 || FullBlockTail2);
}
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
@@ -749,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
CommonTailLen, TrialBBI1, TrialBBI2,
SuccBB, PredBB,
EHScopeMembership,
- AfterBlockPlacement)) {
+ AfterBlockPlacement, MBBFreqInfo, PSI)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
@@ -869,7 +835,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
// Merge MMOs from memory operations in the common block.
- if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+ if (MBBICommon->mayLoadOrStore())
MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
// Drop undef flags if they aren't present in all merged instructions.
for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
@@ -1579,8 +1545,10 @@ ReoptimizeBlock:
}
}
- if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
- MF.getFunction().hasOptSize()) {
+ bool OptForSize =
+ MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
+ if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// performance. Therefore, only consider this for optsize functions.
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index 761ff9c7d54e..7a4c68ea09f5 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -27,6 +27,7 @@ class MachineFunction;
class MachineLoopInfo;
class MachineModuleInfo;
class MachineRegisterInfo;
+class ProfileSummaryInfo;
class raw_ostream;
class TargetInstrInfo;
class TargetRegisterInfo;
@@ -39,6 +40,7 @@ class TargetRegisterInfo;
bool CommonHoist,
MBFIWrapper &FreqInfo,
const MachineBranchProbabilityInfo &ProbInfo,
+ ProfileSummaryInfo *PSI,
// Min tail length to merge. Defaults to commandline
// flag. Ignored for optsize.
unsigned MinTailLength = 0);
@@ -145,6 +147,7 @@ class TargetRegisterInfo;
const BlockFrequency Freq) const;
void view(const Twine &Name, bool isSimple = true);
uint64_t getEntryFreq() const;
+ const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
private:
const MachineBlockFrequencyInfo &MBFI;
@@ -154,6 +157,7 @@ class TargetRegisterInfo;
private:
MBFIWrapper &MBBFreqInfo;
const MachineBranchProbabilityInfo &MBPI;
+ ProfileSummaryInfo *PSI;
bool TailMergeBlocks(MachineFunction &MF);
bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 6efdc9efa968..f05517d178ae 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 709164e5f178..9bae9d36add1 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -19,13 +19,13 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
-
using namespace llvm;
namespace llvm {
diff --git a/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/llvm/lib/CodeGen/CFGuardLongjmp.cpp
new file mode 100644
index 000000000000..c3bf93855111
--- /dev/null
+++ b/llvm/lib/CodeGen/CFGuardLongjmp.cpp
@@ -0,0 +1,120 @@
+//===-- CFGuardLongjmp.cpp - Longjmp symbols for CFGuard --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a machine function pass to insert a symbol after each
+/// call to _setjmp and store this in the MachineFunction's LongjmpTargets
+/// vector. This will be used to emit the table of valid longjmp targets used
+/// by Control Flow Guard.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfguard-longjmp"
+
+STATISTIC(CFGuardLongjmpTargets,
+ "Number of Control Flow Guard longjmp targets");
+
+namespace {
+
+/// MachineFunction pass to insert a symbol after each call to _setjmp and store
+/// this in the MachineFunction's LongjmpTargets vector.
+class CFGuardLongjmp : public MachineFunctionPass {
+public:
+ static char ID;
+
+ CFGuardLongjmp() : MachineFunctionPass(ID) {
+ initializeCFGuardLongjmpPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Control Flow Guard longjmp targets";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char CFGuardLongjmp::ID = 0;
+
+INITIALIZE_PASS(CFGuardLongjmp, "CFGuardLongjmp",
+ "Insert symbols at valid longjmp targets for /guard:cf", false,
+ false)
+FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); }
+
+bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) {
+
+ // Skip modules for which the cfguard flag is not set.
+ if (!MF.getMMI().getModule()->getModuleFlag("cfguard"))
+ return false;
+
+ // Skip functions that do not have calls to _setjmp.
+ if (!MF.getFunction().callsFunctionThatReturnsTwice())
+ return false;
+
+ SmallVector<MachineInstr *, 8> SetjmpCalls;
+
+ // Iterate over all instructions in the function and add calls to functions
+ // that return twice to the list of targets.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // Skip instructions that are not calls.
+ if (!MI.isCall() || MI.getNumOperands() < 1)
+ continue;
+
+ // Iterate over operands to find calls to global functions.
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isGlobal())
+ continue;
+
+ auto *F = dyn_cast<Function>(MO.getGlobal());
+ if (!F)
+ continue;
+
+ // If the instruction calls a function that returns twice, add
+ // it to the list of targets.
+ if (F->hasFnAttribute(Attribute::ReturnsTwice)) {
+ SetjmpCalls.push_back(&MI);
+ break;
+ }
+ }
+ }
+ }
+
+ if (SetjmpCalls.empty())
+ return false;
+
+ unsigned SetjmpNum = 0;
+
+ // For each possible target, create a new symbol and insert it immediately
+ // after the call to setjmp. Add this symbol to the MachineFunction's list
+ // of longjmp targets.
+ for (MachineInstr *Setjmp : SetjmpCalls) {
+ SmallString<128> SymbolName;
+ raw_svector_ostream(SymbolName) << "$cfgsj_" << MF.getName() << SetjmpNum++;
+ MCSymbol *SjSymbol = MF.getContext().getOrCreateSymbol(SymbolName);
+
+ Setjmp->setPostInstrSymbol(MF, SjSymbol);
+ MF.addLongjmpTarget(SjSymbol);
+ CFGuardLongjmpTargets++;
+ }
+
+ return true;
+}
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 1a4d54231cfd..ef548c84d3c0 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -48,8 +49,7 @@ class CFIInstrInserter : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
- if (!MF.getMMI().hasDebugInfo() &&
- !MF.getFunction().needsUnwindTableEntry())
+ if (!MF.needsFrameMoves())
return false;
MBBVector.resize(MF.getNumBlockIDs());
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index ad9525f927e8..20fc67cc66ae 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,6 +22,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
+ initializeCFGuardLongjmpPass(Registry);
initializeCFIInstrInserterPass(Registry);
initializeCodeGenPreparePass(Registry);
initializeDeadMachineInstructionElimPass(Registry);
@@ -104,6 +105,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeTailDuplicatePass(Registry);
initializeTargetPassConfigPass(Registry);
initializeTwoAddressInstructionPassPass(Registry);
+ initializeTypePromotionPass(Registry);
initializeUnpackMachineBundlesPass(Registry);
initializeUnreachableBlockElimLegacyPassPass(Registry);
initializeUnreachableMachineBlockElimPass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index fa4432ea23ec..f05afd058746 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -30,7 +30,6 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
@@ -61,6 +60,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
@@ -73,6 +74,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -88,7 +90,9 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -222,6 +226,10 @@ static cl::opt<bool>
cl::init(true),
cl::desc("Enable splitting large offset of GEP."));
+static cl::opt<bool> EnableICMP_EQToICMP_ST(
+ "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
+ cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
+
namespace {
enum ExtType {
@@ -251,6 +259,7 @@ class TypePromotionTransaction;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
+ ProfileSummaryInfo *PSI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -293,7 +302,7 @@ class TypePromotionTransaction;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
- /// True if optimizing for size.
+ /// True if the function has the OptSize attribute.
bool OptSize;
/// DataLayout for the Function being processed.
@@ -370,6 +379,7 @@ class TypePromotionTransaction;
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
+ bool fixupDbgValue(Instruction *I);
bool placeDbgValues(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -429,10 +439,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
OptSize = F.hasOptSize();
-
- ProfileSummaryInfo *PSI =
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
@@ -451,7 +459,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
// optimization to those blocks.
BasicBlock* Next = BB->getNextNode();
- EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+ // F.hasOptSize is already checked in the outer if statement.
+ if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+ EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
BB = Next;
}
}
@@ -1049,7 +1059,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
// Collect all the relocate calls associated with a statepoint
AllRelocateCalls.push_back(Relocate);
- // We need atleast one base pointer relocation + one derived pointer
+ // We need at least one base pointer relocation + one derived pointer
// relocation to mangle
if (AllRelocateCalls.size() < 2)
return false;
@@ -1408,6 +1418,93 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
return MadeChange;
}
+/// For pattern like:
+///
+/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
+/// ...
+/// DomBB:
+/// ...
+/// br DomCond, TrueBB, CmpBB
+/// CmpBB: (with DomBB being the single predecessor)
+/// ...
+/// Cmp = icmp eq CmpOp0, CmpOp1
+/// ...
+///
+/// It would use two comparison on targets that lowering of icmp sgt/slt is
+/// different from lowering of icmp eq (PowerPC). This function try to convert
+/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
+/// After that, DomCond and Cmp can use the same comparison so reduce one
+/// comparison.
+///
+/// Return true if any changes are made.
+static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
+ const TargetLowering &TLI) {
+ if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred != ICmpInst::ICMP_EQ)
+ return false;
+
+ // If icmp eq has users other than BranchInst and SelectInst, converting it to
+ // icmp slt/sgt would introduce more redundant LLVM IR.
+ for (User *U : Cmp->users()) {
+ if (isa<BranchInst>(U))
+ continue;
+ if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
+ continue;
+ return false;
+ }
+
+ // This is a cheap/incomplete check for dominance - just match a single
+ // predecessor with a conditional branch.
+ BasicBlock *CmpBB = Cmp->getParent();
+ BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+ if (!DomBB)
+ return false;
+
+ // We want to ensure that the only way control gets to the comparison of
+ // interest is that a less/greater than comparison on the same operands is
+ // false.
+ Value *DomCond;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+ return false;
+ if (CmpBB != FalseBB)
+ return false;
+
+ Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
+ ICmpInst::Predicate DomPred;
+ if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
+ return false;
+ if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
+ return false;
+
+ // Convert the equality comparison to the opposite of the dominating
+ // comparison and swap the direction for all branch/select users.
+ // We have conceptually converted:
+ // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
+ // to
+ // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
+ // And similarly for branches.
+ for (User *U : Cmp->users()) {
+ if (auto *BI = dyn_cast<BranchInst>(U)) {
+ assert(BI->isConditional() && "Must be conditional");
+ BI->swapSuccessors();
+ continue;
+ }
+ if (auto *SI = dyn_cast<SelectInst>(U)) {
+ // Swap operands
+ SI->swapValues();
+ SI->swapProfMetadata();
+ continue;
+ }
+ llvm_unreachable("Must be a branch or a select");
+ }
+ Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
+ return true;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1418,6 +1515,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
+ if (foldICmpWithDominatingICmp(Cmp, *TLI))
+ return true;
+
return false;
}
@@ -1842,7 +1942,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// cold block. This interacts with our handling for loads and stores to
// ensure that we can fold all uses of a potential addressing computation
// into their uses. TODO: generalize this to work over profiling data
- if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
+ if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
for (auto &Arg : CI->arg_operands()) {
if (!Arg->getType()->isPointerTy())
continue;
@@ -1907,6 +2008,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+ case Intrinsic::dbg_value:
+ return fixupDbgValue(II);
}
if (TLI) {
@@ -2777,16 +2880,24 @@ class AddressingModeMatcher {
/// When true, IsProfitableToFoldIntoAddressingMode always returns true.
bool IgnoreProfitability;
+ /// True if we are optimizing for size.
+ bool OptSize;
+
+ ProfileSummaryInfo *PSI;
+ BlockFrequencyInfo *BFI;
+
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
- std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+ bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
- PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
+ PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
+ OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@@ -2804,12 +2915,14 @@ public:
const TargetLowering &TLI, const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT,
- std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
+ std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+ bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT, LargeOffsetGEP)
+ PromotedInsts, TPT, LargeOffsetGEP,
+ OptSize, PSI, BFI)
.matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
@@ -4420,7 +4533,8 @@ static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
- const TargetRegisterInfo &TRI, int SeenInsts = 0) {
+ const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, int SeenInsts = 0) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -4429,8 +4543,6 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
- const bool OptSize = I->getFunction()->hasOptSize();
-
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
// Conservatively return true if we're seeing a large number or a deep chain
@@ -4471,7 +4583,9 @@ static bool FindAllMemoryUses(
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
- if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+ bool OptForSize = OptSize ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
continue;
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
@@ -4483,8 +4597,8 @@ static bool FindAllMemoryUses(
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
- SeenInsts))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts))
return true;
}
@@ -4572,7 +4686,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -4608,7 +4723,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(
MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4714,7 +4829,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
0);
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ BFI.get());
GetElementPtrInst *GEP = LargeOffsetGEP.first;
if (GEP && !NewGEPBases.count(GEP)) {
@@ -5932,7 +6048,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// If branch conversion isn't desirable, exit early.
- if (DisableSelectToBranch || OptSize || !TLI)
+ if (DisableSelectToBranch ||
+ OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) ||
+ !TLI)
return false;
// Find all consecutive select instructions that share the same condition.
@@ -7110,42 +7228,68 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
return MadeChange;
}
-// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
-// find a node corresponding to the value.
+// Some CGP optimizations may move or alter what's computed in a block. Check
+// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
+bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
+ assert(isa<DbgValueInst>(I));
+ DbgValueInst &DVI = *cast<DbgValueInst>(I);
+
+ // Does this dbg.value refer to a sunk address calculation?
+ Value *Location = DVI.getVariableLocation();
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ if (SunkAddr) {
+ // Point dbg.value at locally computed address, which should give the best
+ // opportunity to be accurately lowered. This update may change the type of
+ // pointer being referred to; however this makes no difference to debugging
+ // information, and we can't generate bitcasts that may affect codegen.
+ DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(),
+ ValueAsMetadata::get(SunkAddr)));
+ return true;
+ }
+ return false;
+}
+
+// A llvm.dbg.value may be using a value before its definition, due to
+// optimizations in this pass and others. Scan for such dbg.values, and rescue
+// them by moving the dbg.value to immediately after the value definition.
+// FIXME: Ideally this should never be necessary, and this has the potential
+// to re-order dbg.value intrinsics.
bool CodeGenPrepare::placeDbgValues(Function &F) {
bool MadeChange = false;
+ DominatorTree DT(F);
+
for (BasicBlock &BB : F) {
- Instruction *PrevNonDbgInst = nullptr;
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
Instruction *Insn = &*BI++;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
- // Leave dbg.values that refer to an alloca alone. These
- // intrinsics describe the address of a variable (= the alloca)
- // being taken. They should not be moved next to the alloca
- // (and to the beginning of the scope), but rather stay close to
- // where said address is used.
- if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
- PrevNonDbgInst = Insn;
+ if (!DVI)
continue;
- }
Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
- if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
- // If VI is a phi in a block with an EHPad terminator, we can't insert
- // after it.
- if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
- continue;
- LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
- << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
- }
+
+ if (!VI || VI->isTerminator())
+ continue;
+
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
+
+ // If the defining instruction dominates the dbg.value, we do not need
+ // to move the dbg.value.
+ if (DT.dominates(VI, DVI))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
}
}
return MadeChange;
@@ -7201,6 +7345,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
if (Br1->getMetadata(LLVMContext::MD_unpredictable))
continue;
+ // The merging of mostly empty BB can cause a degenerate branch.
+ if (TBB == FBB)
+ continue;
+
unsigned Opc;
Value *Cond1, *Cond2;
if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 702e7e244bce..8d9d48402b31 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isRegMask())
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
- if (MO.clobbersPhysReg(i)) {
+ if (MO.isRegMask()) {
+ auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
+ for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
+ if (!MO.clobbersPhysReg(*SRI))
+ return false;
+
+ return true;
+ };
+
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ if (ClobbersPhysRegAndSubRegs(i)) {
DefIndices[i] = Count;
KillIndices[i] = ~0u;
KeepRegs.reset(i);
Classes[i] = nullptr;
RegRefs.erase(i);
}
+ }
+ }
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index a169c3cb16b2..afcf014bca40 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -52,68 +52,22 @@ static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
static unsigned InstrCount = 0;
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
- return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
-}
-
-/// Return the DFAInput for an instruction class input vector.
-/// This function is used in both DFAPacketizer.cpp and in
-/// DFAPacketizerEmitter.cpp.
-static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
- DFAInput InsnInput = 0;
- assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
- "Exceeded maximum number of DFA terms");
- for (auto U : InsnClass)
- InsnInput = addDFAFuncUnits(InsnInput, U);
- return InsnInput;
-}
-
-// --------------------------------------------------------------------
-
-// Make sure DFA types are large enough for the number of terms & resources.
-static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
- (8 * sizeof(DFAInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
-static_assert(
- (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
-
-// Return the DFAInput for an instruction class.
-DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
- // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
- DFAInput InsnInput = 0;
- unsigned i = 0;
- (void)i;
- for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
- *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
- InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
- assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
- }
- return InsnInput;
-}
-
-// Return the DFAInput for an instruction class input vector.
-DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
- return getDFAInsnInput(InsnClass);
-}
-
// Check if the resources occupied by a MCInstrDesc are available in the
// current state.
bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
- unsigned InsnClass = MID->getSchedClass();
- DFAInput InsnInput = getInsnInput(InsnClass);
- return A.canAdd(InsnInput);
+ unsigned Action = ItinActions[MID->getSchedClass()];
+ if (MID->getSchedClass() == 0 || Action == 0)
+ return false;
+ return A.canAdd(Action);
}
// Reserve the resources occupied by a MCInstrDesc and change the current
// state to reflect that change.
void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
- unsigned InsnClass = MID->getSchedClass();
- DFAInput InsnInput = getInsnInput(InsnClass);
- A.add(InsnInput);
+ unsigned Action = ItinActions[MID->getSchedClass()];
+ if (MID->getSchedClass() == 0 || Action == 0)
+ return;
+ A.add(Action);
}
// Check if the resources occupied by a machine instruction are available
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 9a537c859a67..d1529b08f708 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -81,6 +82,15 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
} else {
+ if (MO.isDead()) {
+#ifndef NDEBUG
+ // Sanity check on uses of this dead register. All of them should be
+ // 'undef'.
+ for (auto &U : MRI->use_nodbg_operands(Reg))
+ assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+#endif
+ continue;
+ }
for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
if (&Use != MI)
// This def has a non-debug use. Don't delete the instruction!
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index ddd6cec5a178..af347fd7e73d 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -17,7 +17,6 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,9 +29,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
using namespace llvm;
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index e5694218b5c3..d45e424184d7 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -940,6 +941,7 @@ class EarlyIfPredicator : public MachineFunctionPass {
TargetSchedModel SchedModel;
MachineRegisterInfo *MRI;
MachineDominatorTree *DomTree;
+ MachineBranchProbabilityInfo *MBPI;
MachineLoopInfo *Loops;
SSAIfConv IfConv;
@@ -965,10 +967,12 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
false)
void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
@@ -978,6 +982,7 @@ void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
/// Apply the target heuristic to decide if the transformation is profitable.
bool EarlyIfPredicator::shouldConvertIf() {
+ auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB);
if (IfConv.isTriangle()) {
MachineBasicBlock &IfBlock =
(IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
@@ -992,7 +997,7 @@ bool EarlyIfPredicator::shouldConvertIf() {
}
return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
- BranchProbability::getUnknown());
+ TrueProbability);
}
unsigned TExtra = 0;
unsigned FExtra = 0;
@@ -1011,8 +1016,7 @@ bool EarlyIfPredicator::shouldConvertIf() {
FExtra += TII->getPredicationCost(I);
}
return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
- FCycle, FExtra,
- BranchProbability::getUnknown());
+ FCycle, FExtra, TrueProbability);
}
/// Attempt repeated if-conversion on MBB, return true if successful.
@@ -1043,6 +1047,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
SchedModel.init(&STI);
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+ MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
bool Changed = false;
IfConv.runOnMachineFunction(MF);
diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp
index 486720cadd27..dfaf7f584652 100644
--- a/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 9916f2de0414..a1adf4ef9820 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -13,6 +13,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -20,6 +22,8 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
using namespace llvm;
@@ -264,9 +268,9 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
uint64_t OffsetBytes) {
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
- Source = Builder.CreateGEP(
+ Source = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
- ConstantInt::get(ByteType, OffsetBytes));
+ OffsetBytes);
}
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
}
@@ -720,7 +724,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
/// ret i32 %phi.res
static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
- const TargetLowering *TLI, const DataLayout *DL) {
+ const TargetLowering *TLI, const DataLayout *DL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
@@ -741,18 +746,20 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
- auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+ auto Options = TTI->enableMemCmpExpansion(OptForSize,
IsUsedForZeroCmp);
if (!Options) return false;
if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
- if (CI->getFunction()->hasOptSize() &&
+ if (OptForSize &&
MaxLoadsPerMemcmpOptSize.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
Options.MaxNumLoads = MaxLoadsPerMemcmp;
MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
@@ -798,7 +805,11 @@ public:
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto PA = runImpl(F, TLI, TTI, TL);
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
return !PA.areAllPreserved();
}
@@ -806,22 +817,26 @@ private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
FunctionPass::getAnalysisUsage(AU);
}
PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
- const TargetLowering* TL);
+ const TargetLowering* TL,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
// Returns true if a change was made.
bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL);
+ const DataLayout& DL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI);
};
bool ExpandMemCmpPass::runOnBlock(
BasicBlock &BB, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, const TargetLowering* TL,
- const DataLayout& DL) {
+ const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
for (Instruction& I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI) {
@@ -830,7 +845,7 @@ bool ExpandMemCmpPass::runOnBlock(
LibFunc Func;
if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
(Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
- expandMemCmp(CI, TTI, TL, &DL)) {
+ expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
return true;
}
}
@@ -840,11 +855,12 @@ bool ExpandMemCmpPass::runOnBlock(
PreservedAnalyses ExpandMemCmpPass::runImpl(
Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
- const TargetLowering* TL) {
+ const TargetLowering* TL, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
const DataLayout& DL = F.getParent()->getDataLayout();
bool MadeChanges = false;
for (auto BBIt = F.begin(); BBIt != F.end();) {
- if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
+ if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
MadeChanges = true;
// If changes were made, restart the function from the beginning, since
// the structure of the function was changed.
@@ -863,6 +879,8 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
"Expand memcmp() to load/stores", false, false)
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 1fc57fac1489..842211c09134 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 1069a2423b8b..4ccf1d2c8c50 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -78,14 +79,32 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
bool Changed = false;
SmallVector<IntrinsicInst *, 4> Worklist;
- for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
- if (auto II = dyn_cast<IntrinsicInst>(&*I))
- Worklist.push_back(II);
+ for (auto &I : instructions(F)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ case Intrinsic::experimental_vector_reduce_add:
+ case Intrinsic::experimental_vector_reduce_mul:
+ case Intrinsic::experimental_vector_reduce_and:
+ case Intrinsic::experimental_vector_reduce_or:
+ case Intrinsic::experimental_vector_reduce_xor:
+ case Intrinsic::experimental_vector_reduce_smax:
+ case Intrinsic::experimental_vector_reduce_smin:
+ case Intrinsic::experimental_vector_reduce_umax:
+ case Intrinsic::experimental_vector_reduce_umin:
+ case Intrinsic::experimental_vector_reduce_fmax:
+ case Intrinsic::experimental_vector_reduce_fmin:
+ if (TTI->shouldExpandReduction(II))
+ Worklist.push_back(II);
- for (auto *II : Worklist) {
- if (!TTI->shouldExpandReduction(II))
- continue;
+ break;
+ }
+ }
+ }
+ for (auto *II : Worklist) {
FastMathFlags FMF =
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
Intrinsic::ID ID = II->getIntrinsicID();
@@ -96,6 +115,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
Builder.setFastMathFlags(FMF);
switch (ID) {
+ default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::experimental_vector_reduce_v2_fadd:
case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
@@ -105,11 +125,15 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
if (!FMF.allowReassoc())
Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
else {
+ if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+ continue;
+
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
Acc, Rdx, "bin.rdx");
}
- } break;
+ break;
+ }
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -122,10 +146,12 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin: {
Value *Vec = II->getArgOperand(0);
+ if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+ continue;
+
Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
- } break;
- default:
- continue;
+ break;
+ }
}
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp
index a122f490884e..4c0f30bce820 100644
--- a/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp
index 600f72d320eb..de0b4fa87098 100644
--- a/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/llvm/lib/CodeGen/FaultMaps.cpp
@@ -28,11 +28,9 @@ const char *FaultMaps::WFMP = "Fault Maps: ";
FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {}
void FaultMaps::recordFaultingOp(FaultKind FaultTy,
+ const MCSymbol *FaultingLabel,
const MCSymbol *HandlerLabel) {
MCContext &OutContext = AP.OutStreamer->getContext();
- MCSymbol *FaultingLabel = OutContext.createTempSymbol();
-
- AP.OutStreamer->EmitLabel(FaultingLabel);
const MCExpr *FaultingOffset = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(FaultingLabel, OutContext),
diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp
index 772d7f71bb37..00040e92a829 100644
--- a/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/FuncletLayout.cpp b/llvm/lib/CodeGen/FuncletLayout.cpp
index 75f6d0b8f0bf..f1222a88b054 100644
--- a/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "funclet-layout"
diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp
index c1d22ef89195..600d662e0f99 100644
--- a/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/llvm/lib/CodeGen/GCMetadata.cpp
@@ -10,11 +10,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 0dc0a5bce747..90e5f32f53b3 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 7d9d812d34bc..e6abfcdb92cb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -10,11 +10,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#define DEBUG_TYPE "cseinfo"
using namespace llvm;
char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+GISelCSEAnalysisWrapperPass::GISelCSEAnalysisWrapperPass()
+ : MachineFunctionPass(ID) {
+ initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+}
INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
"Analysis containing CSE Info", false, true)
INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
@@ -52,7 +57,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
- case TargetOpcode::G_GEP:
+ case TargetOpcode::G_PTR_ADD:
return true;
}
return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index cdad92f7db4f..4c2dbdd905f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -65,7 +65,11 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
Info.SwiftErrorVReg = SwiftErrorVReg;
Info.IsMustTailCall = CS.isMustTailCall();
Info.IsTailCall = CS.isTailCall() &&
- isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+ isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()) &&
+ (MIRBuilder.getMF()
+ .getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() != "true");
Info.IsVarArg = CS.getFunctionType()->isVarArg();
return lowerCall(MIRBuilder, Info);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 854769d283f7..a103e8e4e6e0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -74,12 +74,35 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
return false;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
+
+ // Give up if either DstReg or SrcReg is a physical register.
+ if (Register::isPhysicalRegister(DstReg) ||
+ Register::isPhysicalRegister(SrcReg))
+ return false;
+
+ // Give up the types don't match.
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
- // Simple Copy Propagation.
- // a(sx) = COPY b(sx) -> Replace all uses of a with b.
- if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
+ // Give up if one has a valid LLT, but the other doesn't.
+ if (DstTy.isValid() != SrcTy.isValid())
+ return false;
+ // Give up if the types don't match.
+ if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy)
+ return false;
+
+ // Get the register banks and classes.
+ const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg);
+ const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);
+ const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
+ const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg);
+
+ // Replace if the register constraints match.
+ if ((SrcRC == DstRC) && (SrcBank == DstBank))
+ return true;
+ // Replace if DstReg has no constraints.
+ if (!DstBank && !DstRC)
return true;
+
return false;
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
@@ -109,10 +132,7 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
// Walk over all the operands of concat vectors and check if they are
// build_vector themselves or undef.
// Then collect their operands in Ops.
- for (const MachineOperand &MO : MI.operands()) {
- // Skip the instruction definition.
- if (MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI.uses()) {
Register Reg = MO.getReg();
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Operand not defined");
@@ -121,12 +141,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
IsUndef = false;
// Remember the operands of the build_vector to fold
// them into the yet-to-build flattened concat vectors.
- for (const MachineOperand &BuildVecMO : Def->operands()) {
- // Skip the definition.
- if (BuildVecMO.isDef())
- continue;
+ for (const MachineOperand &BuildVecMO : Def->uses())
Ops.push_back(BuildVecMO.getReg());
- }
break;
case TargetOpcode::G_IMPLICIT_DEF: {
LLT OpType = MRI.getType(Reg);
@@ -189,8 +205,11 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
Register Src1 = MI.getOperand(1).getReg();
LLT SrcType = MRI.getType(Src1);
- unsigned DstNumElts = DstType.getNumElements();
- unsigned SrcNumElts = SrcType.getNumElements();
+ // As bizarre as it may look, shuffle vector can actually produce
+ // scalar! This is because at the IR level a <1 x ty> shuffle
+ // vector is perfectly valid.
+ unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
+ unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
// If the resulting vector is smaller than the size of the source
// vectors being concatenated, we won't be able to replace the
@@ -199,7 +218,15 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
// Note: We may still be able to produce a concat_vectors fed by
// extract_vector_elt and so on. It is less clear that would
// be better though, so don't bother for now.
- if (DstNumElts < 2 * SrcNumElts)
+ //
+ // If the destination is a scalar, the size of the sources doesn't
+ // matter. we will lower the shuffle to a plain copy. This will
+ // work only if the source and destination have the same size. But
+ // that's covered by the next condition.
+ //
+ // TODO: If the size between the source and destination don't match
+ // we could still emit an extract vector element in that case.
+ if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
return false;
// Check that the shuffle mask can be broken evenly between the
@@ -212,8 +239,7 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
// vectors.
unsigned NumConcat = DstNumElts / SrcNumElts;
SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
for (unsigned i = 0; i != DstNumElts; ++i) {
int Idx = Mask[i];
// Undef value.
@@ -254,7 +280,10 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
Builder.setInsertPt(*MI.getParent(), MI);
Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
- Builder.buildConcatVectors(NewDstReg, Ops);
+ if (Ops.size() == 1)
+ Builder.buildCopy(NewDstReg, Ops[0]);
+ else
+ Builder.buildMerge(NewDstReg, Ops);
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, NewDstReg);
@@ -571,7 +600,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
for (auto &Use : MRI.use_instructions(Base)) {
- if (Use.getOpcode() != TargetOpcode::G_GEP)
+ if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
continue;
Offset = Use.getOperand(2).getReg();
@@ -597,8 +626,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
// forming an indexed one.
bool MemOpDominatesAddrUses = true;
- for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
- if (!dominates(MI, GEPUse)) {
+ for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ if (!dominates(MI, PtrAddUse)) {
MemOpDominatesAddrUses = false;
break;
}
@@ -631,7 +660,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
#endif
Addr = MI.getOperand(1).getReg();
- MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+ MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
if (!AddrDef || MRI.hasOneUse(Addr))
return false;
@@ -667,8 +696,8 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
}
}
- // FIXME: check whether all uses of the base pointer are constant GEPs. That
- // might allow us to end base's liveness here by adjusting the constant.
+ // FIXME: check whether all uses of the base pointer are constant PtrAdds.
+ // That might allow us to end base's liveness here by adjusting the constant.
for (auto &UseMI : MRI.use_instructions(Addr)) {
if (!dominates(MI, UseMI)) {
@@ -681,18 +710,36 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
}
bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+ IndexedLoadStoreMatchInfo MatchInfo;
+ if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
+ applyCombineIndexedLoadStore(MI, MatchInfo);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
unsigned Opcode = MI.getOpcode();
if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
return false;
- bool IsStore = Opcode == TargetOpcode::G_STORE;
- Register Addr, Base, Offset;
- bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
- if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+ MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset);
+ if (!MatchInfo.IsPre &&
+ !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+ MatchInfo.Offset))
return false;
+ return true;
+}
+void CombinerHelper::applyCombineIndexedLoadStore(
+ MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+ MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
+ MachineIRBuilder MIRBuilder(MI);
+ unsigned Opcode = MI.getOpcode();
+ bool IsStore = Opcode == TargetOpcode::G_STORE;
unsigned NewOpcode;
switch (Opcode) {
case TargetOpcode::G_LOAD:
@@ -711,25 +758,22 @@ bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
llvm_unreachable("Unknown load/store opcode");
}
- MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
- MachineIRBuilder MIRBuilder(MI);
auto MIB = MIRBuilder.buildInstr(NewOpcode);
if (IsStore) {
- MIB.addDef(Addr);
+ MIB.addDef(MatchInfo.Addr);
MIB.addUse(MI.getOperand(0).getReg());
} else {
MIB.addDef(MI.getOperand(0).getReg());
- MIB.addDef(Addr);
+ MIB.addDef(MatchInfo.Addr);
}
- MIB.addUse(Base);
- MIB.addUse(Offset);
- MIB.addImm(IsPre);
+ MIB.addUse(MatchInfo.Base);
+ MIB.addUse(MatchInfo.Offset);
+ MIB.addImm(MatchInfo.IsPre);
MI.eraseFromParent();
AddrDef.eraseFromParent();
LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
- return true;
}
bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
@@ -1016,7 +1060,7 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
if (DstOff != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
}
MIB.buildStore(Value, Ptr, *StoreMMO);
@@ -1121,13 +1165,13 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
if (CurrOffset != 0) {
Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
.getReg(0);
- LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
}
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
// Create the store.
Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
Size -= CopyTy.getSizeInBytes();
@@ -1218,7 +1262,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
if (CurrOffset != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
}
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
CurrOffset += CopyTy.getSizeInBytes();
@@ -1235,7 +1279,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
if (CurrOffset != 0) {
auto Offset =
MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
@@ -1295,6 +1339,52 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
return false;
}
+bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
+ PtrAddChain &MatchInfo) {
+ // We're trying to match the following pattern:
+ // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
+ // %root = G_PTR_ADD %t1, G_CONSTANT imm2
+ // -->
+ // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
+
+ if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ Register Add2 = MI.getOperand(1).getReg();
+ Register Imm1 = MI.getOperand(2).getReg();
+ auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ if (!MaybeImmVal)
+ return false;
+
+ MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+ if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ Register Base = Add2Def->getOperand(1).getReg();
+ Register Imm2 = Add2Def->getOperand(2).getReg();
+ auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ if (!MaybeImm2Val)
+ return false;
+
+ // Pass the combined immediate to the apply function.
+ MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ MatchInfo.Base = Base;
+ return true;
+}
+
+bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
+ PtrAddChain &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
+ MachineIRBuilder MIB(MI);
+ LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
+ auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(MatchInfo.Base);
+ MI.getOperand(2).setReg(NewOffset.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index be8efa8795f3..64023ecfad82 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -179,8 +179,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known.Zero = KnownZeroOut;
break;
}
- case TargetOpcode::G_GEP: {
- // G_GEP is like G_ADD. FIXME: Is this true for all targets?
+ case TargetOpcode::G_PTR_ADD: {
+ // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
break;
@@ -373,6 +373,76 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
<< Known.One.toString(16, false) << "\n");
}
+unsigned GISelKnownBits::computeNumSignBits(Register R,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ MachineInstr &MI = *MRI.getVRegDef(R);
+ unsigned Opcode = MI.getOpcode();
+
+ if (Opcode == TargetOpcode::G_CONSTANT)
+ return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
+
+ if (Depth == getMaxDepth())
+ return 1;
+
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
+ LLT DstTy = MRI.getType(R);
+
+ // Handle the case where this is called on a register that does not have a
+ // type constraint. This is unlikely to occur except by looking through copies
+ // but it is possible for the initial register being queried to be in this
+ // state.
+ if (!DstTy.isValid())
+ return 1;
+
+ switch (Opcode) {
+ case TargetOpcode::COPY: {
+ MachineOperand &Src = MI.getOperand(1);
+ if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
+ MRI.getType(Src.getReg()).isValid()) {
+ // Don't increment Depth for this one since we didn't do any work.
+ return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
+ }
+
+ return 1;
+ }
+ case TargetOpcode::G_SEXT: {
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+ unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
+ return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
+ }
+ case TargetOpcode::G_TRUNC: {
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // Check if the sign bits of source go down as far as the truncated value.
+ unsigned DstTyBits = DstTy.getScalarSizeInBits();
+ unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+ unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
+ if (NumSrcSignBits > (NumSrcBits - DstTyBits))
+ return NumSrcSignBits - (NumSrcBits - DstTyBits);
+ break;
+ }
+ default:
+ break;
+ }
+
+ // TODO: Handle target instructions
+ // TODO: Fall back to known bits
+ return 1;
+}
+
+unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
+ LLT Ty = MRI.getType(R);
+ APInt DemandedElts = Ty.isVector()
+ ? APInt::getAllOnesValue(Ty.getNumElements())
+ : APInt(1, 1);
+ return computeNumSignBits(R, DemandedElts, Depth);
+}
+
void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 45cef4aca888..17eca2b0301c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -56,6 +56,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -224,12 +225,12 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
if (FrameIndices.find(&AI) != FrameIndices.end())
return FrameIndices[&AI];
- unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
- unsigned Size =
+ uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
+ uint64_t Size =
ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
// Always allocate at least one byte.
- Size = std::max(Size, 1u);
+ Size = std::max<uint64_t>(Size, 1u);
unsigned Alignment = AI.getAlignment();
if (!Alignment)
@@ -466,7 +467,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
return true;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -885,13 +886,15 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
for (unsigned i = 0; i < Regs.size(); ++i) {
Register Addr;
- MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
+ MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(LI);
+ AAMDNodes AAMetadata;
+ LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -926,13 +929,15 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
for (unsigned i = 0; i < Vals.size(); ++i) {
Register Addr;
- MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
+ MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(SI);
+ AAMDNodes AAMetadata;
+ SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
}
@@ -1080,8 +1085,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
- BaseReg =
- MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0);
+ BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
+ .getReg(0);
Offset = 0;
}
@@ -1100,14 +1105,14 @@ bool IRTranslator::translateGetElementPtr(const User &U,
} else
GepOffsetReg = IdxReg;
- BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0);
+ BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
}
}
if (Offset != 0) {
auto OffsetMIB =
MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
- MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
+ MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
return true;
}
@@ -1251,6 +1256,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FSQRT;
case Intrinsic::trunc:
return TargetOpcode::G_INTRINSIC_TRUNC;
+ case Intrinsic::readcyclecounter:
+ return TargetOpcode::G_READCYCLECOUNTER;
}
return Intrinsic::not_intrinsic;
}
@@ -1412,7 +1419,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
+ TLI.isFMAFasterThanFMulAndFAdd(*MF,
+ TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
@@ -1518,6 +1526,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
return true;
+ case Intrinsic::read_register: {
+ Value *Arg = CI.getArgOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_READ_REGISTER)
+ .addDef(getOrCreateVReg(CI))
+ .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
+ return true;
+ }
}
return false;
}
@@ -1587,7 +1602,13 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const Function *F = CI.getCalledFunction();
// FIXME: support Windows dllimport function calls.
- if (F && F->hasDLLImportStorageClass())
+ if (F && (F->hasDLLImportStorageClass() ||
+ (MF->getTarget().getTargetTriple().isOSWindows() &&
+ F->hasExternalWeakLinkage())))
+ return false;
+
+ // FIXME: support control flow guard targets.
+ if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
return false;
if (CI.isInlineAsm())
@@ -1683,6 +1704,10 @@ bool IRTranslator::translateInvoke(const User &U,
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
return false;
+ // FIXME: support control flow guard targets.
+ if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
+ return false;
+
// FIXME: support Windows exception handling.
if (!isa<LandingPadInst>(EHPadBB->front()))
return false;
@@ -1908,11 +1933,14 @@ bool IRTranslator::translateExtractElement(const User &U,
bool IRTranslator::translateShuffleVector(const User &U,
MachineIRBuilder &MIRBuilder) {
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(cast<Constant>(U.getOperand(2)), Mask);
+ ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
.addDef(getOrCreateVReg(U))
.addUse(getOrCreateVReg(*U.getOperand(0)))
.addUse(getOrCreateVReg(*U.getOperand(1)))
- .addShuffleMask(cast<Constant>(U.getOperand(2)));
+ .addShuffleMask(MaskAlloc);
return true;
}
@@ -1950,11 +1978,14 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Register Cmp = getOrCreateVReg(*I.getCompareOperand());
Register NewVal = getOrCreateVReg(*I.getNewValOperand());
+ AAMDNodes AAMetadata;
+ I.getAAMetadata(AAMetadata);
+
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, DL->getTypeStoreSize(ValType),
- getMemOpAlignment(I), AAMDNodes(), nullptr,
+ getMemOpAlignment(I), AAMetadata, nullptr,
I.getSyncScopeID(), I.getSuccessOrdering(),
I.getFailureOrdering()));
return true;
@@ -2019,12 +2050,15 @@ bool IRTranslator::translateAtomicRMW(const User &U,
break;
}
+ AAMDNodes AAMetadata;
+ I.getAAMetadata(AAMetadata);
+
MIRBuilder.buildAtomicRMW(
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, DL->getTypeStoreSize(ResType),
- getMemOpAlignment(I), AAMDNodes(), nullptr,
- I.getSyncScopeID(), I.getOrdering()));
+ getMemOpAlignment(I), AAMetadata,
+ nullptr, I.getSyncScopeID(), I.getOrdering()));
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 28143b30d4e8..b9c90e69ddb2 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -60,7 +60,7 @@ bool InstructionSelector::isBaseWithConstantOffset(
return false;
MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
- if (RootI->getOpcode() != TargetOpcode::G_GEP)
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
MachineOperand &RHS = RootI->getOperand(2);
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1593e21fe07e..e789e4a333dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetMachine.h"
@@ -139,22 +140,13 @@ public:
};
} // namespace
-bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
- // If the ISel pipeline failed, do not bother running that pass.
- if (MF.getProperties().hasProperty(
- MachineFunctionProperties::Property::FailedISel))
- return false;
- LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
- init(MF);
- const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- GISelCSEAnalysisWrapper &Wrapper =
- getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
- MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
-
- const size_t NumBlocks = MF.size();
+Legalizer::MFResult
+Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
+ ArrayRef<GISelChangeObserver *> AuxObservers,
+ MachineIRBuilder &MIRBuilder) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- // Populate Insts
+ // Populate worklists.
InstListTy InstList;
ArtifactListTy ArtifactList;
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
@@ -177,48 +169,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
}
ArtifactList.finalize();
InstList.finalize();
- std::unique_ptr<MachineIRBuilder> MIRBuilder;
- GISelCSEInfo *CSEInfo = nullptr;
- bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
- ? EnableCSEInLegalizer
- : TPC.isGISelCSEEnabled();
- if (EnableCSE) {
- MIRBuilder = std::make_unique<CSEMIRBuilder>();
- CSEInfo = &Wrapper.get(TPC.getCSEConfig());
- MIRBuilder->setCSEInfo(CSEInfo);
- } else
- MIRBuilder = std::make_unique<MachineIRBuilder>();
- // This observer keeps the worklist updated.
+ // This observer keeps the worklists updated.
LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
- // We want both WorkListObserver as well as CSEInfo to observe all changes.
- // Use the wrapper observer.
+ // We want both WorkListObserver as well as all the auxiliary observers (e.g.
+ // CSEInfo) to observe all changes. Use the wrapper observer.
GISelObserverWrapper WrapperObserver(&WorkListObserver);
- if (EnableCSE && CSEInfo)
- WrapperObserver.addObserver(CSEInfo);
+ for (GISelChangeObserver *Observer : AuxObservers)
+ WrapperObserver.addObserver(Observer);
+
// Now install the observer as the delegate to MF.
// This will keep all the observers notified about new insertions/deletions.
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
- LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get());
- const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
- LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(),
- LInfo);
+ LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
+ LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
WrapperObserver.erasingInstr(*DeadMI);
};
- auto stopLegalizing = [&](MachineInstr &MI) {
- Helper.MIRBuilder.stopObservingChanges();
- reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
- "unable to legalize instruction", MI);
- };
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
+ LLVM_DEBUG(dbgs() << "=== New Iteration ===\n");
assert(RetryList.empty() && "Expected no instructions in RetryList");
unsigned NumArtifacts = ArtifactList.size();
while (!InstList.empty()) {
MachineInstr &MI = *InstList.pop_back_val();
- assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+ assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+ "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
MI.eraseFromParentAndMarkDBGValuesForRemoval();
@@ -234,11 +211,17 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// legalizing InstList may generate artifacts that allow
// ArtifactCombiner to combine away them.
if (isArtifact(MI)) {
+ LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n");
+ assert(NumArtifacts == 0 &&
+ "Artifacts are only expected in instruction list starting the "
+ "second iteration, but each iteration starting second must "
+ "start with an empty artifacts list");
+ (void)NumArtifacts;
RetryList.push_back(&MI);
continue;
}
- stopLegalizing(MI);
- return false;
+ Helper.MIRBuilder.stopObservingChanges();
+ return {Changed, &MI};
}
WorkListObserver.printNewInstrs();
Changed |= Res == LegalizerHelper::Legalized;
@@ -246,18 +229,19 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Try to combine the instructions in RetryList again if there
// are new artifacts. If not, stop legalizing.
if (!RetryList.empty()) {
- if (ArtifactList.size() > NumArtifacts) {
+ if (!ArtifactList.empty()) {
while (!RetryList.empty())
ArtifactList.insert(RetryList.pop_back_val());
} else {
- MachineInstr *MI = *RetryList.begin();
- stopLegalizing(*MI);
- return false;
+ LLVM_DEBUG(dbgs() << "No new artifacts created, not retrying!\n");
+ Helper.MIRBuilder.stopObservingChanges();
+ return {Changed, RetryList.front()};
}
}
while (!ArtifactList.empty()) {
MachineInstr &MI = *ArtifactList.pop_back_val();
- assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+ assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+ "Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
LLVM_DEBUG(dbgs() << MI << "Is dead\n");
RemoveDeadInstFromLists(&MI);
@@ -265,6 +249,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
+ LLVM_DEBUG(dbgs() << "Trying to combine: " << MI);
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
@@ -279,13 +264,58 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// If this was not an artifact (that could be combined away), this might
// need special handling. Add it to InstList, so when it's processed
// there, it has to be legal or specially handled.
- else
+ else {
+ LLVM_DEBUG(dbgs() << ".. Not combined, moving to instructions list\n");
InstList.insert(&MI);
+ }
}
} while (!InstList.empty());
+ return {Changed, /*FailedOn*/ nullptr};
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+ LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+ init(MF);
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+ GISelCSEAnalysisWrapper &Wrapper =
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+ const size_t NumBlocks = MF.size();
+
+ std::unique_ptr<MachineIRBuilder> MIRBuilder;
+ GISelCSEInfo *CSEInfo = nullptr;
+ bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+ ? EnableCSEInLegalizer
+ : TPC.isGISelCSEEnabled();
+ if (EnableCSE) {
+ MIRBuilder = std::make_unique<CSEMIRBuilder>();
+ CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+ MIRBuilder->setCSEInfo(CSEInfo);
+ } else
+ MIRBuilder = std::make_unique<MachineIRBuilder>();
+
+ SmallVector<GISelChangeObserver *, 1> AuxObservers;
+ if (EnableCSE && CSEInfo) {
+ // We want CSEInfo in addition to WorkListObserver to observe all changes.
+ AuxObservers.push_back(CSEInfo);
+ }
+
+ const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
+ MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder);
+
+ if (Result.FailedOn) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction", *Result.FailedOn);
+ return false;
+ }
// For now don't support if new blocks are inserted - we would need to fix the
- // outerloop for that.
+ // outer loop for that.
if (MF.size() != NumBlocks) {
MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
MF.getFunction().getSubprogram(),
@@ -294,6 +324,5 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
reportGISelFailure(MF, TPC, MORE, R);
return false;
}
-
- return Changed;
+ return Result.Changed;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 21512e543878..667e1a04dc34 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1075,6 +1075,28 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_BSWAP:
+ case TargetOpcode::G_BITREVERSE: {
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ unsigned NumParts = SizeOp0 / NarrowSize;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {SrcRegs[NumParts - 1 - i]});
+ DstRegs.push_back(DstPart.getReg(0));
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+
+ Observer.changedInstr(MI);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -1675,7 +1697,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_CONSTANT: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+ unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
+ MRI.getType(MI.getOperand(0).getReg()));
+ assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
+ ExtOpc == TargetOpcode::G_ANYEXT) &&
+ "Illegal Extend");
+ const APInt &SrcVal = SrcMO.getCImm()->getValue();
+ const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
+ ? SrcVal.sext(WideTy.getSizeInBits())
+ : SrcVal.zext(WideTy.getSizeInBits());
Observer.changingInstr(MI);
SrcMO.setCImm(ConstantInt::get(Ctx, Val));
@@ -1748,8 +1778,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_GEP:
- assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+ case TargetOpcode::G_PTR_ADD:
+ assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
@@ -1789,10 +1819,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
+ // TODO: Probably should be zext
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
+
+ widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideVecTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 2) {
+ Observer.changingInstr(MI);
+ // TODO: Probably should be zext
+ widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ }
+
+ return Legalized;
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
@@ -1998,6 +2053,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
case TargetOpcode::G_FMAD:
return lowerFMad(MI);
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ return lowerIntrinsicRound(MI);
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -2058,8 +2115,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto OffsetCst =
MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
- Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
*SmallMMO);
@@ -2083,7 +2141,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
default:
llvm_unreachable("Unexpected opcode");
case TargetOpcode::G_LOAD:
- MIRBuilder.buildAnyExt(DstReg, TmpReg);
+ MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
break;
case TargetOpcode::G_SEXTLOAD:
MIRBuilder.buildSExt(DstReg, TmpReg);
@@ -2126,12 +2184,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
- // Generate the GEP and truncating stores.
+ // Generate the PtrAdd and truncating stores.
LLT PtrTy = MRI.getType(PtrReg);
auto OffsetCst =
MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
- Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
- auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+ Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr =
+ MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
MachineFunction &MF = MIRBuilder.getMF();
MachineMemOperand *LargeMMO =
@@ -2254,6 +2313,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerExtract(MI);
case G_INSERT:
return lowerInsert(MI);
+ case G_BSWAP:
+ return lowerBswap(MI);
+ case G_BITREVERSE:
+ return lowerBitreverse(MI);
+ case G_READ_REGISTER:
+ return lowerReadRegister(MI);
}
}
@@ -2883,7 +2948,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
unsigned ByteOffset = Offset / 8;
Register NewAddrReg;
- MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+ MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
MachineMemOperand *NewMMO =
MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
@@ -2960,6 +3025,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_BSWAP:
case G_BITREVERSE:
case G_SDIV:
+ case G_UDIV:
+ case G_SREM:
+ case G_UREM:
case G_SMIN:
case G_SMAX:
case G_UMIN:
@@ -3259,7 +3327,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_SMIN:
case TargetOpcode::G_SMAX:
case TargetOpcode::G_UMIN:
- case TargetOpcode::G_UMAX: {
+ case TargetOpcode::G_UMAX:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM: {
Observer.changingInstr(MI);
moreElementsVectorSrc(MI, MoreTy, 1);
moreElementsVectorSrc(MI, MoreTy, 2);
@@ -3352,7 +3426,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
Factors.push_back(Umulh.getReg(0));
}
- // Add CarrySum from additons calculated for previous DstIdx.
+ // Add CarrySum from additions calculated for previous DstIdx.
if (DstIdx != 1) {
Factors.push_back(CarrySumPrevDstIdx);
}
@@ -3824,6 +3898,14 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
LLT DstTy = MRI.getType(Dst);
LLT SrcTy = MRI.getType(Src);
+ if (SrcTy == LLT::scalar(1)) {
+ auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
+ auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+ MIRBuilder.buildSelect(Dst, Src, True, False);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (SrcTy != LLT::scalar(64))
return UnableToLegalize;
@@ -3849,6 +3931,14 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
+ if (SrcTy == S1) {
+ auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
+ auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+ MIRBuilder.buildSelect(Dst, Src, True, False);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
if (SrcTy != S64)
return UnableToLegalize;
@@ -3910,8 +4000,10 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
+ const LLT S1 = LLT::scalar(1);
+
MachineInstrBuilder FCMP =
- MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
MI.eraseFromParent();
@@ -4042,6 +4134,33 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ LLT Ty = MRI.getType(DstReg);
+ const LLT CondTy = Ty.changeElementSize(1);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+ auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
+
+ auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
+ SrcReg, Zero, Flags);
+ auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
+ SrcReg, Trunc, Flags);
+ auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
+ auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
+
+ MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
const Register SrcReg = MI.getOperand(NumDst).getReg();
@@ -4083,10 +4202,7 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
LLT DstTy = MRI.getType(DstReg);
LLT IdxTy = LLT::scalar(32);
- const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
-
- SmallVector<int, 32> Mask;
- ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
+ ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
if (DstTy.isScalar()) {
if (Src0Ty.isVector())
@@ -4151,7 +4267,7 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
// Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
// have to generate an extra instruction to negate the alloc and then use
- // G_GEP to add the negative offset.
+ // G_PTR_ADD to add the negative offset.
auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
if (Align) {
APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
@@ -4275,3 +4391,99 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBswap(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const LLT Ty = MRI.getType(Src);
+ unsigned SizeInBytes = Ty.getSizeInBytes();
+ unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
+
+ // Swap most and least significant byte, set remaining bytes in Res to zero.
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
+ auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
+ auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+ auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
+
+ // Set i-th high/low byte in Res to i-th low/high byte from Src.
+ for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
+ // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
+ APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
+ auto Mask = MIRBuilder.buildConstant(Ty, APMask);
+ auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
+ // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
+ auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
+ auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
+ Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
+ // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
+ auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+ auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
+ Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
+ }
+ Res.getInstr()->getOperand(0).setReg(Dst);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+//{ (Src & Mask) >> N } | { (Src << N) & Mask }
+static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
+ MachineInstrBuilder Src, APInt Mask) {
+ const LLT Ty = Dst.getLLTTy(*B.getMRI());
+ MachineInstrBuilder C_N = B.buildConstant(Ty, N);
+ MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
+ auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
+ auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
+ return B.buildOr(Dst, LHS, RHS);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const LLT Ty = MRI.getType(Src);
+ unsigned Size = Ty.getSizeInBits();
+
+ MachineInstrBuilder BSWAP =
+ MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
+
+ // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
+ // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
+ // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
+ MachineInstrBuilder Swap4 =
+ SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
+
+ // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
+ // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
+ // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
+ MachineInstrBuilder Swap2 =
+ SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+
+ // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
+ // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
+ // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
+ SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(Dst);
+ const MDString *RegStr = cast<MDString>(
+ cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetLowering *TLI = STI.getTargetLowering();
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
+ if (!Reg.isValid())
+ return UnableToLegalize;
+
+ MIRBuilder.buildCopy(Dst, Reg);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 70045512fae5..02f6b39e0905 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -128,24 +128,26 @@ static bool mutationIsSane(const LegalizeRule &Rule,
switch (Rule.getAction()) {
case FewerElements:
- case MoreElements: {
if (!OldTy.isVector())
return false;
-
+ LLVM_FALLTHROUGH;
+ case MoreElements: {
+ // MoreElements can go from scalar to vector.
+ const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1;
if (NewTy.isVector()) {
if (Rule.getAction() == FewerElements) {
// Make sure the element count really decreased.
- if (NewTy.getNumElements() >= OldTy.getNumElements())
+ if (NewTy.getNumElements() >= OldElts)
return false;
} else {
// Make sure the element count really increased.
- if (NewTy.getNumElements() <= OldTy.getNumElements())
+ if (NewTy.getNumElements() <= OldElts)
return false;
}
}
// Make sure the element type didn't change.
- return NewTy.getScalarType() == OldTy.getElementType();
+ return NewTy.getScalarType() == OldTy.getScalarType();
}
case NarrowScalar:
case WidenScalar: {
@@ -685,6 +687,10 @@ bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
return true;
}
+unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const {
+ return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+}
+
/// \pre Type indices of every opcode form a dense set starting from 0.
void LegalizerInfo::verify(const MCInstrInfo &MII) const {
#ifndef NDEBUG
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index f882ecbf5db3..1c4a668e5f31 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -10,9 +10,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Localizer.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "localizer"
@@ -28,7 +29,11 @@ INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
"Move/duplicate certain instructions close to their use",
false, false)
-Localizer::Localizer() : MachineFunctionPass(ID) { }
+Localizer::Localizer(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+Localizer::Localizer()
+ : Localizer([](const MachineFunction &) { return false; }) {}
void Localizer::init(MachineFunction &MF) {
MRI = &MF.getRegInfo();
@@ -211,6 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
MachineFunctionProperties::Property::FailedISel))
return false;
+ // Don't run the pass if the target asked so.
+ if (DoNotRunPass(MF))
+ return false;
+
LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
init(MF);
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index df770f6664ca..67d9dacda61b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -219,19 +219,19 @@ void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
assert((Res == Op0) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
- const SrcOp &Op0,
- const SrcOp &Op1) {
+MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
assert(Res.getLLTTy(*getMRI()).isPointer() &&
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1});
+ return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
}
Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
- const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
+ const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -242,7 +242,7 @@ MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
auto Cst = buildConstant(ValueTy, Value);
- return buildGEP(Res, Op0, Cst.getReg(0));
+ return buildPtrAdd(Res, Op0, Cst.getReg(0));
}
MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
@@ -698,8 +698,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
}
MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
- const SrcOp &Op) {
- return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
+ const SrcOp &Op,
+ Optional<unsigned> Flags) {
+ return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
}
MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index f0e35c65c53b..98e48f5fc1d5 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -32,6 +32,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
@@ -118,16 +119,16 @@ bool RegBankSelect::assignmentMatch(
return false;
const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
- const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
// Reg is free of assignment, a simple assignment will make the
// register bank to match.
OnlyAssign = CurRegBank == nullptr;
LLVM_DEBUG(dbgs() << "Does assignment already match: ";
if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
dbgs() << " against ";
- assert(DesiredRegBrank && "The mapping must be valid");
- dbgs() << *DesiredRegBrank << '\n';);
- return CurRegBank == DesiredRegBrank;
+ assert(DesiredRegBank && "The mapping must be valid");
+ dbgs() << *DesiredRegBank << '\n';);
+ return CurRegBank == DesiredRegBank;
}
bool RegBankSelect::repairReg(
@@ -259,11 +260,11 @@ uint64_t RegBankSelect::getRepairCost(
return RBI->getBreakDownCost(ValMapping, CurRegBank);
if (IsSameNumOfValues) {
- const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+ const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
// If we repair a definition, swap the source and destination for
// the repairing.
if (MO.isDef())
- std::swap(CurRegBank, DesiredRegBrank);
+ std::swap(CurRegBank, DesiredRegBank);
// TODO: It may be possible to actually avoid the copy.
// If we repair something where the source is defined by a copy
// and the source of that copy is on the right bank, we can reuse
@@ -275,7 +276,7 @@ uint64_t RegBankSelect::getRepairCost(
// into a new virtual register.
// We would also need to propagate this information in the
// repairing placement.
- unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+ unsigned Cost = RBI->copyCost(*DesiredRegBank, *CurRegBank,
RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
if (Cost != std::numeric_limits<unsigned>::max())
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 3fcc55286beb..255ea693b5c4 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -82,15 +82,18 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (Register::isPhysicalRegister(Reg))
- return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
+ if (Register::isPhysicalRegister(Reg)) {
+ // FIXME: This was probably a copy to a virtual register that does have a
+ // type we could use.
+ return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
+ }
assert(Reg && "NoRegister does not have a register bank");
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
return RB;
if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
- return &getRegBankFromRegClass(*RC);
+ return &getRegBankFromRegClass(*RC, MRI.getType(Reg));
return nullptr;
}
@@ -108,15 +111,18 @@ RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI) const {
+ const MachineRegisterInfo &MRI) const {
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+
// The mapping of the registers may be available via the
// register class constraints.
- const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI);
+ const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI);
if (!RC)
return nullptr;
- const RegisterBank &RegBank = getRegBankFromRegClass(*RC);
+ Register Reg = MI.getOperand(OpIdx).getReg();
+ const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
// Sanity check that the target properly implemented getRegBankFromRegClass.
assert(RegBank.covers(*RC) &&
"The mapping of the register bank does not make sense");
@@ -195,7 +201,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
if (!CurRegBank) {
// If this is a target specific instruction, we can deduce
// the register bank from the encoding constraints.
- CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
+ CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI);
if (!CurRegBank) {
// All our attempts failed, give up.
CompleteMapping = false;
@@ -444,7 +450,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
continue;
}
if (!MO.getReg()) {
- LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+ LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n");
continue;
}
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 45618d7992ad..eeec2a5d536a 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -431,20 +431,3 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
-
-MVT llvm::getMVTForLLT(LLT Ty) {
- if (!Ty.isVector())
- return MVT::getIntegerVT(Ty.getSizeInBits());
-
- return MVT::getVectorVT(
- MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
- Ty.getNumElements());
-}
-
-LLT llvm::getLLTForMVT(MVT Ty) {
- if (!Ty.isVector())
- return LLT::scalar(Ty.getSizeInBits());
-
- return LLT::vector(Ty.getVectorNumElements(),
- Ty.getVectorElementType().getSizeInBits());
-}
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index d4fa45fcb405..5870e20d4227 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -82,6 +82,7 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 6a0f98d2e2b4..65c2a37e5d43 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -15,25 +15,28 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -75,8 +78,44 @@ ForceGuardLoopEntry(
STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+#ifndef NDEBUG
+static void debugHWLoopFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "HWLoops: " << DebugMsg;
+ if (I)
+ dbgs() << ' ' << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
+static OptimizationRemarkAnalysis
+createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
+ Value *CodeRegion = L->getHeader();
+ DebugLoc DL = L->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
+ R << "hardware-loop not created: ";
+ return R;
+}
+
namespace {
+ void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
+ LLVM_DEBUG(debugHWLoopFailure(Msg, I));
+ ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
+ }
+
using TTI = TargetTransformInfo;
class HardwareLoops : public FunctionPass {
@@ -97,6 +136,7 @@ namespace {
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
// Try to convert the given Loop into a hardware loop.
@@ -110,6 +150,7 @@ namespace {
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
const DataLayout *DL = nullptr;
+ OptimizationRemarkEmitter *ORE = nullptr;
const TargetTransformInfo *TTI = nullptr;
DominatorTree *DT = nullptr;
bool PreserveLCSSA = false;
@@ -143,8 +184,9 @@ namespace {
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
- const DataLayout &DL) :
- SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
+ const DataLayout &DL,
+ OptimizationRemarkEmitter *ORE) :
+ SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
@@ -157,6 +199,7 @@ namespace {
private:
ScalarEvolution &SE;
const DataLayout &DL;
+ OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
@@ -182,6 +225,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
+ ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
@@ -201,31 +245,39 @@ bool HardwareLoops::runOnFunction(Function &F) {
// converted and the parent loop doesn't support containing a hardware loop.
bool HardwareLoops::TryConvertLoop(Loop *L) {
// Process nested loops first.
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- if (TryConvertLoop(*I))
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ if (TryConvertLoop(*I)) {
+ reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
+ ORE, L);
return true; // Stop search.
+ }
+ }
HardwareLoopInfo HWLoopInfo(L);
- if (!HWLoopInfo.canAnalyze(*LI))
+ if (!HWLoopInfo.canAnalyze(*LI)) {
+ reportHWLoopFailure("cannot analyze loop, irreducible control flow",
+ "HWLoopCannotAnalyze", ORE, L);
return false;
+ }
- if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
- ForceHardwareLoops) {
-
- // Allow overriding of the counter width and loop decrement value.
- if (CounterBitWidth.getNumOccurrences())
- HWLoopInfo.CountType =
- IntegerType::get(M->getContext(), CounterBitWidth);
+ if (!ForceHardwareLoops &&
+ !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+ reportHWLoopFailure("it's not profitable to create a hardware-loop",
+ "HWLoopNotProfitable", ORE, L);
+ return false;
+ }
- if (LoopDecrement.getNumOccurrences())
- HWLoopInfo.LoopDecrement =
- ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+ // Allow overriding of the counter width and loop decrement value.
+ if (CounterBitWidth.getNumOccurrences())
+ HWLoopInfo.CountType =
+ IntegerType::get(M->getContext(), CounterBitWidth);
- MadeChange |= TryConvertLoop(HWLoopInfo);
- return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
- }
+ if (LoopDecrement.getNumOccurrences())
+ HWLoopInfo.LoopDecrement =
+ ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
- return false;
+ MadeChange |= TryConvertLoop(HWLoopInfo);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
}
bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
@@ -234,8 +286,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
- ForceHardwareLoopPHI))
+ ForceHardwareLoopPHI)) {
+ // TODO: there can be many reasons a loop is not considered a
+ // candidate, so we should let isHardwareLoopCandidate fill in the
+ // reason and then report a better message here.
+ reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
return false;
+ }
assert(
(HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
@@ -249,7 +306,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
if (!Preheader)
return false;
- HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
+ HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
HWLoop.Create();
++NumHWLoops;
return true;
@@ -257,10 +314,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
void HardwareLoop::Create() {
LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
-
+
Value *LoopCountInit = InitLoopCount();
- if (!LoopCountInit)
+ if (!LoopCountInit) {
+ reportHWLoopFailure("could not safely create a loop count expression",
+ "HWLoopNotSafe", ORE, L);
return;
+ }
InsertIterationSetup(LoopCountInit);
@@ -458,6 +518,7 @@ INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index d9caa5660695..7d64828aa482 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -35,7 +36,9 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
@@ -211,6 +214,7 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -432,6 +436,7 @@ char &llvm::IfConverterID = IfConverter::ID;
INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
@@ -444,6 +449,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TRI = ST.getRegisterInfo();
BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+ ProfileSummaryInfo *PSI =
+ &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
MRI = &MF.getRegInfo();
SchedModel.init(&ST);
@@ -454,7 +461,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
bool BFChange = false;
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
- BranchFolder BF(true, false, MBFI, *MBPI);
+ BranchFolder BF(true, false, MBFI, *MBPI, PSI);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
BFChange = BF.OptimizeFunction(
MF, TII, ST.getRegisterInfo(),
@@ -596,7 +603,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
BBAnalysis.clear();
if (MadeChange && IfCvtBranchFold) {
- BranchFolder BF(false, false, MBFI, *MBPI);
+ BranchFolder BF(false, false, MBFI, *MBPI, PSI);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
BF.OptimizeFunction(
MF, TII, MF.getSubtarget().getRegisterInfo(),
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index b7dcaec90106..0bbedb0a5ea6 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -50,6 +50,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -371,7 +372,7 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
// We want the mem access to be issued at a sane offset from PointerReg,
// so that if PointerReg is null then the access reliably page faults.
- if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
+ if (!(MI.mayLoadOrStore() && !MI.isPredicable() &&
-PageSize < Offset && Offset < PageSize))
return SR_Unsuitable;
@@ -697,7 +698,7 @@ void ImplicitNullChecks::rewriteNullChecks(
if (auto *DepMI = NC.getOnlyDependency()) {
for (auto &MO : DepMI->operands()) {
- if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead())
continue;
if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
NC.getNotNullSucc()->addLiveIn(MO.getReg());
diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 7ac093ba4a71..4473a139d3ad 100644
--- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 2408f18678e4..ed3e159ac566 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -534,7 +534,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
// may have more remats than physregs, we're guaranteed to fail to assign
// one.
// At the moment, we only handle this for STATEPOINTs since they're the only
- // psuedo op where we've seen this. If we start seeing other instructions
+ // pseudo op where we've seen this. If we start seeing other instructions
// with the same problem, we need to revisit this.
return (MI.getOpcode() != TargetOpcode::STATEPOINT);
}
@@ -543,8 +543,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
- MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops);
if (!RI.Reads)
return false;
@@ -782,7 +781,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
/// foldMemoryOperand - Try folding stack slot references in Ops into their
/// instructions.
///
-/// @param Ops Operand indices from analyzeVirtReg().
+/// @param Ops Operand indices from AnalyzeVirtRegInBundle().
/// @param LoadMI Load instruction to use instead of stack slot when non-null.
/// @return True on success.
bool InlineSpiller::
@@ -851,8 +850,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// Skip non-Defs, including undef uses and internal reads.
if (MO->isUse())
continue;
- MIBundleOperands::PhysRegInfo RI =
- MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI);
+ PhysRegInfo RI = AnalyzePhysRegInBundle(*FoldMI, Reg, &TRI);
if (RI.FullyDefined)
continue;
// FoldMI does not define this physreg. Remove the LI segment.
@@ -992,8 +990,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- MIBundleOperands::VirtRegInfo RI =
- MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
@@ -1430,7 +1427,7 @@ void HoistSpillHelper::runHoistSpills(
}
// For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
// save them to SpillsToIns.
- for (const auto Ent : SpillsToKeep) {
+ for (const auto &Ent : SpillsToKeep) {
if (Ent.second)
SpillsToIns[Ent.first->getBlock()] = Ent.second;
}
@@ -1489,7 +1486,7 @@ void HoistSpillHelper::hoistAllSpills() {
LLVM_DEBUG({
dbgs() << "Finally inserted spills in BB: ";
- for (const auto Ispill : SpillsToIns)
+ for (const auto &Ispill : SpillsToIns)
dbgs() << Ispill.first->getNumber() << " ";
dbgs() << "\nFinally removed spills in BB: ";
for (const auto Rspill : SpillsToRm)
@@ -1504,7 +1501,7 @@ void HoistSpillHelper::hoistAllSpills() {
StackIntvl.getValNumInfo(0));
// Insert hoisted spills.
- for (auto const Insert : SpillsToIns) {
+ for (auto const &Insert : SpillsToIns) {
MachineBasicBlock *BB = Insert.first;
unsigned LiveReg = Insert.second;
MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 14bc560a561c..1f9b436378d2 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 770c4952d169..42691b8a6154 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -34,6 +34,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1167,7 +1168,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// If there are users outside the set to be eliminated, we abort the
// transformation. No gain can be expected.
- for (const auto &U : I->users()) {
+ for (auto *U : I->users()) {
if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
return false;
}
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 8cbd8bcaeabb..4461a235d6c1 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -50,14 +50,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
return NewCI;
}
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
/// Emit the code to lower bswap of V before the specified instruction IP.
static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
@@ -254,34 +246,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
- // The setjmp/longjmp intrinsics should only exist in the code if it was
- // never optimized (ie, right out of the CFE), or if it has been hacked on
- // by the lowerinvoke pass. In both cases, the right thing to do is to
- // convert the call to an explicit setjmp or longjmp call.
- case Intrinsic::setjmp: {
- Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
- Type::getInt32Ty(Context));
- if (!CI->getType()->isVoidTy())
- CI->replaceAllUsesWith(V);
- break;
- }
- case Intrinsic::sigsetjmp:
- if (!CI->getType()->isVoidTy())
- CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
- break;
-
- case Intrinsic::longjmp: {
- ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
- Type::getVoidTy(Context));
- break;
- }
-
- case Intrinsic::siglongjmp: {
- // Insert the call to abort
- ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
- Type::getVoidTy(Context));
- break;
- }
case Intrinsic::ctpop:
CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
break;
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 1c362aec6e67..50c178ff7598 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -48,8 +48,8 @@ void LLVMTargetMachine::initAsmInfo() {
STI.reset(TheTarget.createMCSubtargetInfo(
getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
- MCAsmInfo *TmpAsmInfo =
- TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
+ MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
+ *MRI, getTargetTriple().str(), Options.MCOptions);
// TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
// and if the old one gets included then MCAsmInfo will be NULL and
// we'll crash later.
diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index cef5085ae079..63a0d0c1c43e 100644
--- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -14,6 +14,7 @@
///===---------------------------------------------------------------------===//
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index f1b237d83e8c..2226c10b49a4 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -57,6 +57,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -89,8 +90,28 @@ static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
}
+/// If \p Op is a stack or frame register return true, otherwise return false.
+/// This is used to avoid basing the debug entry values on the registers, since
+/// we do not support it at the moment.
+static bool isRegOtherThanSPAndFP(const MachineOperand &Op,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI) {
+ if (!Op.isReg())
+ return false;
+
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ Register Reg = Op.getReg();
+
+ return Reg && Reg != SP && Reg != FP;
+}
+
namespace {
+using DefinedRegsSet = SmallSet<Register, 32>;
+
class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
@@ -123,60 +144,6 @@ private:
using FragmentInfo = DIExpression::FragmentInfo;
using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
- /// Storage for identifying a potentially inlined instance of a variable,
- /// or a fragment thereof.
- class DebugVariable {
- const DILocalVariable *Variable;
- OptFragmentInfo Fragment;
- const DILocation *InlinedAt;
-
- /// Fragment that will overlap all other fragments. Used as default when
- /// caller demands a fragment.
- static const FragmentInfo DefaultFragment;
-
- public:
- DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo,
- const DILocation *InlinedAt)
- : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
-
- DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo,
- const DILocation *InlinedAt)
- : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
-
- DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
- const DILocation *InlinedAt)
- : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {}
-
- DebugVariable(const MachineInstr &MI)
- : DebugVariable(MI.getDebugVariable(),
- MI.getDebugExpression()->getFragmentInfo(),
- MI.getDebugLoc()->getInlinedAt()) {}
-
- const DILocalVariable *getVar() const { return Variable; }
- const OptFragmentInfo &getFragment() const { return Fragment; }
- const DILocation *getInlinedAt() const { return InlinedAt; }
-
- const FragmentInfo getFragmentDefault() const {
- return Fragment.getValueOr(DefaultFragment);
- }
-
- static bool isFragmentDefault(FragmentInfo &F) {
- return F == DefaultFragment;
- }
-
- bool operator==(const DebugVariable &Other) const {
- return std::tie(Variable, Fragment, InlinedAt) ==
- std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
- }
-
- bool operator<(const DebugVariable &Other) const {
- return std::tie(Variable, Fragment, InlinedAt) <
- std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
- }
- };
-
- friend struct llvm::DenseMapInfo<DebugVariable>;
-
/// A pair of debug variable and value location.
struct VarLoc {
// The location at which a spilled variable resides. It consists of a
@@ -205,7 +172,9 @@ private:
RegisterKind,
SpillLocKind,
ImmediateKind,
- EntryValueKind
+ EntryValueKind,
+ EntryValueBackupKind,
+ EntryValueCopyBackupKind
} Kind = InvalidKind;
/// The value location. Stored separately to avoid repeatedly
@@ -220,14 +189,15 @@ private:
} Loc;
VarLoc(const MachineInstr &MI, LexicalScopes &LS)
- : Var(MI), Expr(MI.getDebugExpression()), MI(MI),
- UVS(MI.getDebugLoc(), LS) {
+ : Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt()),
+ Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
- Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind;
+ Kind = RegisterKind;
Loc.RegNo = RegNo;
} else if (MI.getOperand(0).isImm()) {
Kind = ImmediateKind;
@@ -239,17 +209,50 @@ private:
Kind = ImmediateKind;
Loc.CImm = MI.getOperand(0).getCImm();
}
- assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) &&
- "entry values must be register locations");
+
+ // We create the debug entry values from the factory functions rather than
+ // from this ctor.
+ assert(Kind != EntryValueKind && !isEntryBackupLoc());
}
/// Take the variable and machine-location in DBG_VALUE MI, and build an
/// entry location using the given expression.
static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
- const DIExpression *EntryExpr) {
+ const DIExpression *EntryExpr, unsigned Reg) {
VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
VL.Kind = EntryValueKind;
VL.Expr = EntryExpr;
+ VL.Loc.RegNo = Reg;
+ return VL;
+ }
+
+ /// Take the variable and machine-location from the DBG_VALUE (from the
+ /// function entry), and build an entry value backup location. The backup
+ /// location will turn into the normal location if the backup is valid at
+ /// the time of the primary location clobbering.
+ static VarLoc CreateEntryBackupLoc(const MachineInstr &MI,
+ LexicalScopes &LS,
+ const DIExpression *EntryExpr) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Kind = EntryValueBackupKind;
+ VL.Expr = EntryExpr;
+ return VL;
+ }
+
+ /// Take the variable and machine-location from the DBG_VALUE (from the
+ /// function entry), and build a copy of an entry value backup location by
+ /// setting the register location to NewReg.
+ static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI,
+ LexicalScopes &LS,
+ const DIExpression *EntryExpr,
+ unsigned NewReg) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Kind = EntryValueCopyBackupKind;
+ VL.Expr = EntryExpr;
+ VL.Loc.RegNo = NewReg;
return VL;
}
@@ -288,8 +291,11 @@ private:
switch (Kind) {
case EntryValueKind:
// An entry value is a register location -- but with an updated
- // expression.
- return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr);
+ // expression. The register location of such DBG_VALUE is always the one
+ // from the entry DBG_VALUE, it does not matter if the entry value was
+ // copied in to another register due to some optimizations.
+ return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(),
+ Var, Expr);
case RegisterKind:
// Register locations are like the source DBG_VALUE, but with the
// register number from this VarLoc.
@@ -308,8 +314,11 @@ private:
MachineOperand MO = MI.getOperand(0);
return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
}
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
case InvalidKind:
- llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+ llvm_unreachable(
+ "Tried to produce DBG_VALUE for invalid or backup VarLoc");
}
llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
}
@@ -317,6 +326,27 @@ private:
/// Is the Loc field a constant or constant object?
bool isConstant() const { return Kind == ImmediateKind; }
+ /// Check if the Loc field is an entry backup location.
+ bool isEntryBackupLoc() const {
+ return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind;
+ }
+
+ /// If this variable is described by a register holding the entry value,
+ /// return it, otherwise return 0.
+ unsigned getEntryValueBackupReg() const {
+ if (Kind == EntryValueBackupKind)
+ return Loc.RegNo;
+ return 0;
+ }
+
+ /// If this variable is described by a register holding the copy of the
+ /// entry value, return it, otherwise return 0.
+ unsigned getEntryValueCopyBackupReg() const {
+ if (Kind == EntryValueCopyBackupKind)
+ return Loc.RegNo;
+ return 0;
+ }
+
/// If this variable is described by a register, return it,
/// otherwise return 0.
unsigned isDescribedByReg() const {
@@ -336,6 +366,8 @@ private:
switch (Kind) {
case RegisterKind:
case EntryValueKind:
+ case EntryValueBackupKind:
+ case EntryValueCopyBackupKind:
dbgs() << printReg(Loc.RegNo, TRI);
break;
case SpillLocKind:
@@ -349,11 +381,17 @@ private:
llvm_unreachable("Invalid VarLoc in dump method");
}
- dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", ";
+ dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr
+ << ", ";
if (Var.getInlinedAt())
dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
else
- dbgs() << "(null))\n";
+ dbgs() << "(null))";
+
+ if (isEntryBackupLoc())
+ dbgs() << " (backup loc)\n";
+ else
+ dbgs() << "\n";
}
#endif
@@ -369,7 +407,6 @@ private:
}
};
- using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>;
using VarLocMap = UniqueVector<VarLoc>;
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
@@ -395,10 +432,18 @@ private:
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
/// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
- /// previous open ranges for the same variable.
+ /// previous open ranges for the same variable. In addition, we keep
+ /// two different maps (Vars/EntryValuesBackupVars), so erase/insert
+ /// methods act differently depending on whether a VarLoc is primary
+ /// location or backup one. In the case the VarLoc is backup location
+ /// we will erase/insert from the EntryValuesBackupVars map, otherwise
+ /// we perform the operation on the Vars.
class OpenRangesSet {
VarLocSet VarLocs;
+ // Map the DebugVariable to recent primary location ID.
SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+ // Map the DebugVariable to recent backup location ID.
+ SmallDenseMap<DebugVariable, unsigned, 8> EntryValuesBackupVars;
OverlapMap &OverlappingFragments;
public:
@@ -406,40 +451,38 @@ private:
const VarLocSet &getVarLocs() const { return VarLocs; }
- /// Terminate all open ranges for Var by removing it from the set.
- void erase(DebugVariable Var);
+ /// Terminate all open ranges for VL.Var by removing it from the set.
+ void erase(const VarLoc &VL);
/// Terminate all open ranges listed in \c KillSet by removing
/// them from the set.
- void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) {
- VarLocs.intersectWithComplement(KillSet);
- for (unsigned ID : KillSet)
- Vars.erase(VarLocIDs[ID].Var);
- }
+ void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs);
/// Insert a new range into the set.
- void insert(unsigned VarLocID, DebugVariable Var) {
- VarLocs.set(VarLocID);
- Vars.insert({Var, VarLocID});
- }
+ void insert(unsigned VarLocID, const VarLoc &VL);
/// Insert a set of ranges.
void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
for (unsigned Id : ToLoad) {
- const VarLoc &Var = Map[Id];
- insert(Id, Var.Var);
+ const VarLoc &VarL = Map[Id];
+ insert(Id, VarL);
}
}
+ llvm::Optional<unsigned> getEntryValueBackup(DebugVariable Var);
+
/// Empty the set.
void clear() {
VarLocs.clear();
Vars.clear();
+ EntryValuesBackupVars.clear();
}
/// Return whether the set is empty or not.
bool empty() const {
- assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent");
+ assert(Vars.empty() == EntryValuesBackupVars.empty() &&
+ Vars.empty() == VarLocs.empty() &&
+ "open ranges are inconsistent");
return VarLocs.empty();
}
};
@@ -456,6 +499,14 @@ private:
bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
unsigned &Reg);
+ /// Returns true if the given machine instruction is a debug value which we
+ /// can emit entry values for.
+ ///
+ /// Currently, we generate debug entry values only for parameters that are
+ /// unmodified throughout the function and located in a register.
+ bool isEntryValueCandidate(const MachineInstr &MI,
+ const DefinedRegsSet &Regs) const;
+
/// If a given instruction is identified as a spill, return the spill location
/// and set \p Reg to the spilled register.
Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
@@ -473,23 +524,23 @@ private:
VarLocMap &VarLocIDs);
void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
+ bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers,
- DebugParamMap &DebugEntryVals,
SparseBitVector<> &KillSet);
+ void recordEntryValue(const MachineInstr &MI,
+ const DefinedRegsSet &DefinedRegs,
+ OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers,
- DebugParamMap &DebugEntryVals);
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, DebugParamMap &DebugEntryVals,
- OverlapMap &OverlapFragments,
- VarToFragments &SeenFragments);
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
OverlapMap &OLapMap);
@@ -532,46 +583,10 @@ public:
} // end anonymous namespace
-namespace llvm {
-
-template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> {
- using DV = LiveDebugValues::DebugVariable;
- using OptFragmentInfo = LiveDebugValues::OptFragmentInfo;
- using FragmentInfo = LiveDebugValues::FragmentInfo;
-
- // Empty key: no key should be generated that has no DILocalVariable.
- static inline DV getEmptyKey() {
- return DV(nullptr, OptFragmentInfo(), nullptr);
- }
-
- // Difference in tombstone is that the Optional is meaningful
- static inline DV getTombstoneKey() {
- return DV(nullptr, OptFragmentInfo({0, 0}), nullptr);
- }
-
- static unsigned getHashValue(const DV &D) {
- unsigned HV = 0;
- const OptFragmentInfo &Fragment = D.getFragment();
- if (Fragment)
- HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
-
- return hash_combine(D.getVar(), HV, D.getInlinedAt());
- }
-
- static bool isEqual(const DV &A, const DV &B) { return A == B; }
-};
-
-} // namespace llvm
-
//===----------------------------------------------------------------------===//
// Implementation
//===----------------------------------------------------------------------===//
-const DIExpression::FragmentInfo
- LiveDebugValues::DebugVariable::DefaultFragment = {
- std::numeric_limits<uint64_t>::max(),
- std::numeric_limits<uint64_t>::min()};
-
char LiveDebugValues::ID = 0;
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
@@ -592,38 +607,72 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
}
/// Erase a variable from the set of open ranges, and additionally erase any
-/// fragments that may overlap it.
-void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) {
+/// fragments that may overlap it. If the VarLoc is a buckup location, erase
+/// the variable from the EntryValuesBackupVars set, indicating we should stop
+/// tracking its backup entry location. Otherwise, if the VarLoc is primary
+/// location, erase the variable from the Vars set.
+void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
// Erasure helper.
- auto DoErase = [this](DebugVariable VarToErase) {
- auto It = Vars.find(VarToErase);
- if (It != Vars.end()) {
+ auto DoErase = [VL, this](DebugVariable VarToErase) {
+ auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ auto It = EraseFrom->find(VarToErase);
+ if (It != EraseFrom->end()) {
unsigned ID = It->second;
VarLocs.reset(ID);
- Vars.erase(It);
+ EraseFrom->erase(It);
}
};
+ DebugVariable Var = VL.Var;
+
// Erase the variable/fragment that ends here.
DoErase(Var);
// Extract the fragment. Interpret an empty fragment as one that covers all
// possible bits.
- FragmentInfo ThisFragment = Var.getFragmentDefault();
+ FragmentInfo ThisFragment = Var.getFragmentOrDefault();
// There may be fragments that overlap the designated fragment. Look them up
// in the pre-computed overlap map, and erase them too.
- auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment});
+ auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment});
if (MapIt != OverlappingFragments.end()) {
for (auto Fragment : MapIt->second) {
LiveDebugValues::OptFragmentInfo FragmentHolder;
- if (!DebugVariable::isFragmentDefault(Fragment))
+ if (!DebugVariable::isDefaultFragment(Fragment))
FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
- DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()});
+ DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()});
}
}
}
+void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
+ const VarLocMap &VarLocIDs) {
+ VarLocs.intersectWithComplement(KillSet);
+ for (unsigned ID : KillSet) {
+ const VarLoc *VL = &VarLocIDs[ID];
+ auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ EraseFrom->erase(VL->Var);
+ }
+}
+
+void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID,
+ const VarLoc &VL) {
+ auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+ VarLocs.set(VarLocID);
+ InsertInto->insert({VL.Var, VarLocID});
+}
+
+/// Return the Loc ID of an entry value backup location, if it exists for the
+/// variable.
+llvm::Optional<unsigned>
+LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
+ auto It = EntryValuesBackupVars.find(Var);
+ if (It != EntryValuesBackupVars.end())
+ return It->second;
+
+ return llvm::None;
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -642,7 +691,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
Out << "MBB: " << BB.getNumber() << ":\n";
for (unsigned VLL : L) {
const VarLoc &VL = VarLocIDs[VLL];
- Out << " Var: " << VL.Var.getVar()->getName();
+ Out << " Var: " << VL.Var.getVariable()->getName();
Out << " MI: ";
VL.dump(TRI, Out);
}
@@ -666,6 +715,62 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return {Reg, Offset};
}
+/// Try to salvage the debug entry value if we encounter a new debug value
+/// describing the same parameter, otherwise stop tracking the value. Return
+/// true if we should stop tracking the entry value, otherwise return false.
+bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL) {
+ // Skip the DBG_VALUE which is the debug entry value itself.
+ if (MI.isIdenticalTo(EntryVL.MI))
+ return false;
+
+ // If the parameter's location is not register location, we can not track
+ // the entry value any more. In addition, if the debug expression from the
+ // DBG_VALUE is not empty, we can assume the parameter's value has changed
+ // indicating that we should stop tracking its entry value as well.
+ if (!MI.getOperand(0).isReg() ||
+ MI.getDebugExpression()->getNumElements() != 0)
+ return true;
+
+ // If the DBG_VALUE comes from a copy instruction that copies the entry value,
+ // it means the parameter's value has not changed and we should be able to use
+ // its entry value.
+ bool TrySalvageEntryValue = false;
+ Register Reg = MI.getOperand(0).getReg();
+ auto I = std::next(MI.getReverseIterator());
+ const MachineOperand *SrcRegOp, *DestRegOp;
+ if (I != MI.getParent()->rend()) {
+ // TODO: Try to keep tracking of an entry value if we encounter a propagated
+ // DBG_VALUE describing the copy of the entry value. (Propagated entry value
+ // does not indicate the parameter modification.)
+ auto DestSrc = TII->isCopyInstr(*I);
+ if (!DestSrc)
+ return true;
+
+ SrcRegOp = DestSrc->Source;
+ DestRegOp = DestSrc->Destination;
+ if (Reg != DestRegOp->getReg())
+ return true;
+ TrySalvageEntryValue = true;
+ }
+
+ if (TrySalvageEntryValue) {
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ const VarLoc &VL = VarLocIDs[ID];
+ if (!VL.isEntryBackupLoc())
+ continue;
+
+ if (VL.getEntryValueCopyBackupReg() == Reg &&
+ VL.MI.getOperand(0).getReg() == SrcRegOp->getReg())
+ return false;
+ }
+ }
+
+ return true;
+}
+
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
@@ -680,18 +785,33 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
"Expected inlined-at fields to agree");
- // End all previous ranges of Var.
DebugVariable V(Var, Expr, InlinedAt);
- OpenRanges.erase(V);
- // Add the VarLoc to OpenRanges from this DBG_VALUE.
+ // Check if this DBG_VALUE indicates a parameter's value changing.
+ // If that is the case, we should stop tracking its entry value.
+ auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
+ if (Var->isParameter() && EntryValBackupID) {
+ const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+ if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
+ LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+ MI.print(dbgs(), /*IsStandalone*/ false,
+ /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+ /*AddNewLine*/ true, TII));
+ OpenRanges.erase(EntryVL);
+ }
+ }
+
unsigned ID;
if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
// Use normal VarLoc constructor for registers and immediates.
VarLoc VL(MI, LS);
+ // End all previous ranges of VL.Var.
+ OpenRanges.erase(VL);
+
ID = VarLocIDs.insert(VL);
- OpenRanges.insert(ID, VL.Var);
+ // Add the VarLoc to OpenRanges from this DBG_VALUE.
+ OpenRanges.insert(ID, VL);
} else if (MI.hasOneMemOperand()) {
llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
} else {
@@ -701,32 +821,30 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
}
}
+/// Turn the entry value backup locations into primary locations.
void LiveDebugValues::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers,
- DebugParamMap &DebugEntryVals,
SparseBitVector<> &KillSet) {
for (unsigned ID : KillSet) {
- if (!VarLocIDs[ID].Var.getVar()->isParameter())
+ if (!VarLocIDs[ID].Var.getVariable()->isParameter())
continue;
- const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI;
+ auto DebugVar = VarLocIDs[ID].Var;
+ auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar);
- // If parameter's DBG_VALUE is not in the map that means we can't
- // generate parameter's entry value.
- if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable()))
+ // If the parameter has the entry value backup, it means we should
+ // be able to use its entry value.
+ if (!EntryValBackupID)
continue;
- auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
- DIExpression *NewExpr = DIExpression::prepend(
- ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
-
- VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr);
-
- unsigned EntryValLocID = VarLocIDs.insert(EntryLoc);
- Transfers.push_back({&MI, EntryValLocID});
- OpenRanges.insert(EntryValLocID, EntryLoc.Var);
+ const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+ VarLoc EntryLoc =
+ VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo);
+ unsigned EntryValueID = VarLocIDs.insert(EntryLoc);
+ Transfers.push_back({&MI, EntryValueID});
+ OpenRanges.insert(EntryValueID, EntryLoc);
}
}
@@ -741,23 +859,21 @@ void LiveDebugValues::insertTransferDebugPair(
unsigned NewReg) {
const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
- auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
- &VarLocIDs](VarLoc &VL) {
+ auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) {
unsigned LocId = VarLocIDs.insert(VL);
// Close this variable's previous location range.
- DebugVariable V(*DebugInstr);
- OpenRanges.erase(V);
+ OpenRanges.erase(VL);
// Record the new location as an open range, and a postponed transfer
// inserting a DBG_VALUE for this location.
- OpenRanges.insert(LocId, VL.Var);
+ OpenRanges.insert(LocId, VL);
TransferDebugPair MIP = {&MI, LocId};
Transfers.push_back(MIP);
};
- // End all previous ranges of Var.
- OpenRanges.erase(VarLocIDs[OldVarID].Var);
+ // End all previous ranges of VL.Var.
+ OpenRanges.erase(VarLocIDs[OldVarID]);
switch (Kind) {
case TransferKind::TransferCopy: {
assert(NewReg &&
@@ -788,8 +904,6 @@ void LiveDebugValues::insertTransferDebugPair(
case TransferKind::TransferRestore: {
assert(NewReg &&
"No register supplied when handling a restore of a debug value");
- MachineFunction *MF = MI.getMF();
- DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
// DebugInstr refers to the pre-spill location, therefore we can reuse
// its expression.
VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
@@ -807,7 +921,7 @@ void LiveDebugValues::insertTransferDebugPair(
/// A definition of a register may mark the end of a range.
void LiveDebugValues::transferRegisterDef(
MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
- TransferMap &Transfers, DebugParamMap &DebugEntryVals) {
+ TransferMap &Transfers) {
MachineFunction *MF = MI.getMF();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
@@ -841,8 +955,7 @@ void LiveDebugValues::transferRegisterDef(
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.EnableDebugEntryValues)
- emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals,
- KillSet);
+ emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
}
}
@@ -980,12 +1093,12 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
if (TKind == TransferKind::TransferSpill &&
VarLocIDs[ID].isDescribedByReg() == Reg) {
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
} else if (TKind == TransferKind::TransferRestore &&
VarLocIDs[ID].Kind == VarLoc::SpillLocKind &&
VarLocIDs[ID].Loc.SpillLocation == *Loc) {
LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
- << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
} else
continue;
insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
@@ -1001,13 +1114,17 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
TransferMap &Transfers) {
- const MachineOperand *SrcRegOp, *DestRegOp;
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (!DestSrc)
+ return;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
- if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() ||
- !DestRegOp->isDef())
+ if (!DestRegOp->isDef())
return;
- auto isCalleSavedReg = [&](unsigned Reg) {
+ auto isCalleeSavedReg = [&](unsigned Reg) {
for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
if (CalleeSavedRegs.test(*RAI))
return true;
@@ -1022,7 +1139,31 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
// included, there would be a great chance that it is going to be clobbered
// soon. It is more likely that previous register location, which is callee
// saved, is going to stay unclobbered longer, even if it is killed.
- if (!isCalleSavedReg(DestReg))
+ if (!isCalleeSavedReg(DestReg))
+ return;
+
+ // Remember an entry value movement. If we encounter a new debug value of
+ // a parameter describing only a moving of the value around, rather then
+ // modifying it, we are still able to use the entry value if needed.
+ if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) {
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) {
+ LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
+ VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc(
+ VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg);
+
+ // Stop tracking the original entry value.
+ OpenRanges.erase(VarLocIDs[ID]);
+
+ // Start tracking the entry value copy.
+ unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup);
+ OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup);
+ break;
+ }
+ }
+ }
+
+ if (!SrcRegOp->isKill())
return;
for (unsigned ID : OpenRanges.getVarLocs()) {
@@ -1070,26 +1211,27 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
VarToFragments &SeenFragments,
OverlapMap &OverlappingFragments) {
- DebugVariable MIVar(MI);
- FragmentInfo ThisFragment = MIVar.getFragmentDefault();
+ DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
// If this is the first sighting of this variable, then we are guaranteed
// there are currently no overlapping fragments either. Initialize the set
// of seen fragments, record no overlaps for the current one, and return.
- auto SeenIt = SeenFragments.find(MIVar.getVar());
+ auto SeenIt = SeenFragments.find(MIVar.getVariable());
if (SeenIt == SeenFragments.end()) {
SmallSet<FragmentInfo, 4> OneFragment;
OneFragment.insert(ThisFragment);
- SeenFragments.insert({MIVar.getVar(), OneFragment});
+ SeenFragments.insert({MIVar.getVariable(), OneFragment});
- OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
return;
}
// If this particular Variable/Fragment pair already exists in the overlap
// map, it has already been accounted for.
auto IsInOLapMap =
- OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
if (!IsInOLapMap.second)
return;
@@ -1107,7 +1249,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
// Mark the previously seen fragment as being overlapped by the current
// one.
auto ASeenFragmentsOverlaps =
- OverlappingFragments.find({MIVar.getVar(), ASeenFragment});
+ OverlappingFragments.find({MIVar.getVariable(), ASeenFragment});
assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
"Previously seen var fragment has no vector of overlaps");
ASeenFragmentsOverlaps->second.push_back(ThisFragment);
@@ -1117,16 +1259,11 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
AllSeenFragments.insert(ThisFragment);
}
-/// This routine creates OpenRanges and OutLocs.
+/// This routine creates OpenRanges.
void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers,
- DebugParamMap &DebugEntryVals,
- OverlapMap &OverlapFragments,
- VarToFragments &SeenFragments) {
+ VarLocMap &VarLocIDs, TransferMap &Transfers) {
transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
- DebugEntryVals);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
@@ -1175,7 +1312,7 @@ bool LiveDebugValues::join(
if (!InLocsT.empty()) {
for (auto ID : InLocsT)
dbgs() << " gathered candidate incoming var: "
- << VarLocIDs[ID].Var.getVar()->getName() << "\n";
+ << VarLocIDs[ID].Var.getVariable()->getName() << "\n";
}
});
@@ -1190,7 +1327,7 @@ bool LiveDebugValues::join(
if (!VarLocIDs[ID].dominates(MBB)) {
KillSet.set(ID);
LLVM_DEBUG({
- auto Name = VarLocIDs[ID].Var.getVar()->getName();
+ auto Name = VarLocIDs[ID].Var.getVariable()->getName();
dbgs() << " killing " << Name << ", it doesn't dominate MBB\n";
});
}
@@ -1247,6 +1384,8 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
// The ID location is live-in to MBB -- work out what kind of machine
// location it is and create a DBG_VALUE.
const VarLoc &DiffIt = VarLocIDs[ID];
+ if (DiffIt.isEntryBackupLoc())
+ continue;
MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
MBB.insert(MBB.instr_begin(), MI);
@@ -1256,6 +1395,87 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
}
}
+bool LiveDebugValues::isEntryValueCandidate(
+ const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const {
+ assert(MI.isDebugValue() && "This must be DBG_VALUE.");
+
+ // TODO: Add support for local variables that are expressed in terms of
+ // parameters entry values.
+ // TODO: Add support for modified arguments that can be expressed
+ // by using its entry value.
+ auto *DIVar = MI.getDebugVariable();
+ if (!DIVar->isParameter())
+ return false;
+
+ // Do not consider parameters that belong to an inlined function.
+ if (MI.getDebugLoc()->getInlinedAt())
+ return false;
+
+ // Do not consider indirect debug values (TODO: explain why).
+ if (MI.isIndirectDebugValue())
+ return false;
+
+ // Only consider parameters that are described using registers. Parameters
+ // that are passed on the stack are not yet supported, so ignore debug
+ // values that are described by the frame or stack pointer.
+ if (!isRegOtherThanSPAndFP(MI.getOperand(0), MI, TRI))
+ return false;
+
+ // If a parameter's value has been propagated from the caller, then the
+ // parameter's DBG_VALUE may be described using a register defined by some
+ // instruction in the entry block, in which case we shouldn't create an
+ // entry value.
+ if (DefinedRegs.count(MI.getOperand(0).getReg()))
+ return false;
+
+ // TODO: Add support for parameters that have a pre-existing debug expressions
+ // (e.g. fragments, or indirect parameters using DW_OP_deref).
+ if (MI.getDebugExpression()->getNumElements() > 0)
+ return false;
+
+ return true;
+}
+
+/// Collect all register defines (including aliases) for the given instruction.
+static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg())
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Regs.insert(*AI);
+}
+
+/// This routine records the entry values of function parameters. The values
+/// could be used as backup values. If we loose the track of some unmodified
+/// parameters, the backup values will be used as a primary locations.
+void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
+ const DefinedRegsSet &DefinedRegs,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs) {
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (!TM.Options.EnableDebugEntryValues)
+ return;
+ }
+
+ DebugVariable V(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+
+ if (!isEntryValueCandidate(MI, DefinedRegs) ||
+ OpenRanges.getEntryValueBackup(V))
+ return;
+
+ LLVM_DEBUG(dbgs() << "Creating the backup entry location: "; MI.dump(););
+
+ // Create the entry value and use it as a backup location until it is
+ // valid. It is valid until a parameter is not changed.
+ DIExpression *NewExpr =
+ DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
+ VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr);
+ unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup);
+ OpenRanges.insert(EntryValLocID, EntryValLocAsBackup);
+}
+
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
@@ -1266,12 +1486,13 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool MBBJoined = false;
VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
- OverlapMap OverlapFragments; // Map of overlapping variable fragments
+ OverlapMap OverlapFragments; // Map of overlapping variable fragments.
OpenRangesSet OpenRanges(OverlapFragments);
// Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
- TransferMap Transfers; // DBG_VALUEs associated with spills.
+ TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
+ // spills, copies and restores).
VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but
// that we have deferred creating DBG_VALUE insts
// for immediately.
@@ -1291,42 +1512,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
std::greater<unsigned int>>
Pending;
- // Besides parameter's modification, check whether a DBG_VALUE is inlined
- // in order to deduce whether the variable that it tracks comes from
- // a different function. If that is the case we can't track its entry value.
- auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) {
- auto *DIVar = MI.getDebugVariable();
- return DIVar->isParameter() && DIVar->isNotModified() &&
- !MI.getDebugLoc()->getInlinedAt();
- };
-
- const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
- unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- Register FP = TRI->getFrameRegister(MF);
- auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
- return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
- };
-
- // Working set of currently collected debug variables mapped to DBG_VALUEs
- // representing candidates for production of debug entry values.
- DebugParamMap DebugEntryVals;
+ // Set of register defines that are seen when traversing the entry block
+ // looking for debug entry value candidates.
+ DefinedRegsSet DefinedRegs;
- MachineBasicBlock &First_MBB = *(MF.begin());
// Only in the case of entry MBB collect DBG_VALUEs representing
// function parameters in order to generate debug entry values for them.
- // Currently, we generate debug entry values only for parameters that are
- // unmodified throughout the function and located in a register.
- // TODO: Add support for parameters that are described as fragments.
- // TODO: Add support for modified arguments that can be expressed
- // by using its entry value.
- // TODO: Add support for local variables that are expressed in terms of
- // parameters entry values.
- for (auto &MI : First_MBB)
- if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) &&
- !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) &&
- !DebugEntryVals.count(MI.getDebugVariable()) &&
- !MI.getDebugExpression()->isFragment())
- DebugEntryVals[MI.getDebugVariable()] = &MI;
+ MachineBasicBlock &First_MBB = *(MF.begin());
+ for (auto &MI : First_MBB) {
+ collectRegDefs(MI, DefinedRegs, TRI);
+ if (MI.isDebugValue())
+ recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
+ }
// Initialize per-block structures and scan for fragment overlaps.
for (auto &MBB : MF) {
@@ -1379,13 +1576,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
MBBJoined = false;
Changed = true;
// Now that we have started to extend ranges across BBs we need to
- // examine spill instructions to see whether they spill registers that
- // correspond to user variables.
+ // examine spill, copy and restore instructions to see whether they
+ // operate with registers that correspond to user variables.
// First load any pending inlocs.
OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
for (auto &MI : *MBB)
- process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- DebugEntryVals, OverlapFragments, SeenFragments);
+ process(MI, OpenRanges, VarLocIDs, Transfers);
OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
@@ -1439,8 +1635,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
- TFI->determineCalleeSaves(MF, CalleeSavedRegs,
- std::make_unique<RegScavenger>().get());
+ TFI->getCalleeSaves(MF, CalleeSavedRegs);
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 2dd462fc72b3..2cc547a6b741 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -49,6 +49,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -142,51 +143,22 @@ namespace {
class LDVImpl;
-/// A UserValue is uniquely identified by the source variable it refers to
-/// (Variable), the expression describing how to get the value (Expression) and
-/// the specific usage (InlinedAt). InlinedAt differentiates both between
-/// inline and non-inline functions, and multiple inlined instances in the same
-/// scope. FIXME: The only part of the Expression which matters for UserValue
-/// identification is the fragment part.
-class UserValueIdentity {
-private:
- /// The debug info variable we are part of.
- const DILocalVariable *Variable;
- /// Any complex address expression.
- const DIExpression *Expression;
- /// Function usage identification.
- const DILocation *InlinedAt;
-
-public:
- UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA)
- : Variable(Var), Expression(Expr), InlinedAt(IA) {}
-
- bool match(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA) const {
- // FIXME: The fragment should be part of the identity, but not
- // other things in the expression like stack values.
- return Var == Variable && Expr == Expression && IA == InlinedAt;
- }
-
- bool match(const UserValueIdentity &Other) const {
- return match(Other.Variable, Other.Expression, Other.InlinedAt);
- }
-
- unsigned hash_value() const {
- return hash_combine(Variable, Expression, InlinedAt);
- }
-};
-
/// A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
class UserValue {
const DILocalVariable *Variable; ///< The debug info variable we are part of.
const DIExpression *Expression; ///< Any complex address expression.
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next = nullptr; ///< Next value in equivalence class, or null.
/// Numbered locations referenced by locmap.
SmallVector<MachineOperand, 4> locations;
@@ -207,15 +179,49 @@ class UserValue {
LiveIntervals &LIS);
public:
- UserValue(const UserValue &) = delete;
-
/// Create a new UserValue.
UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {}
+ : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
+ locInts(alloc) {}
+
+ /// Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// Does this UserValue match the parameters?
+ bool match(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA) const {
+ // FIXME: The fragment should be part of the equivalence class, but not
+ // other things in the expression like stack values.
+ return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
+ }
- UserValueIdentity getId() {
- return UserValueIdentity(Variable, Expression, dl->getInlinedAt());
+ /// Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next) {
+ End->leader = L1;
+ End = End->next;
+ }
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
}
/// Return the location number that matches Loc.
@@ -250,6 +256,25 @@ public:
return locations.size() - 1;
}
+ /// Remove (recycle) a location number. If \p LocNo still is used by the
+ /// locInts nothing is done.
+ void removeLocationIfUnused(unsigned LocNo) {
+ // Bail out if LocNo still is used.
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ DbgValueLocation Loc = I.value();
+ if (Loc.locNo() == LocNo)
+ return;
+ }
+ // Remove the entry in the locations vector, and adjust all references to
+ // location numbers above the removed entry.
+ locations.erase(locations.begin() + LocNo);
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ DbgValueLocation Loc = I.value();
+ if (!Loc.isUndef() && Loc.locNo() > LocNo)
+ I.setValueUnchecked(Loc.changeLocNo(Loc.locNo() - 1));
+ }
+ }
+
/// Ensure that all virtual register locations are mapped.
void mapVirtRegs(LDVImpl *LDV);
@@ -327,29 +352,7 @@ public:
void print(raw_ostream &, const TargetRegisterInfo *);
};
-} // namespace
-namespace llvm {
-template <> struct DenseMapInfo<UserValueIdentity> {
- static UserValueIdentity getEmptyKey() {
- auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey();
- return UserValueIdentity(Key, nullptr, nullptr);
- }
- static UserValueIdentity getTombstoneKey() {
- auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey();
- return UserValueIdentity(Key, nullptr, nullptr);
- }
- static unsigned getHashValue(const UserValueIdentity &Val) {
- return Val.hash_value();
- }
- static bool isEqual(const UserValueIdentity &LHS,
- const UserValueIdentity &RHS) {
- return LHS.match(RHS);
- }
-};
-} // namespace llvm
-
-namespace {
/// A user label is a part of a debug info user label.
class UserLabel {
const DILabel *Label; ///< The debug info label we are part of.
@@ -401,20 +404,20 @@ class LDVImpl {
/// All allocated UserLabel instances.
SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
- /// Map virtual register to UserValues which use it.
- using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>;
- VRMap VirtRegToUserVals;
+ /// Map virtual register to eq class leader.
+ using VRMap = DenseMap<unsigned, UserValue *>;
+ VRMap virtRegToEqClass;
- /// Map unique UserValue identity to UserValue.
- using UVMap = DenseMap<UserValueIdentity, UserValue *>;
- UVMap UserVarMap;
+ /// Map user variable to eq class leader.
+ using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
+ UVMap userVarMap;
/// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
const DebugLoc &DL);
- /// Find the UserValues for VirtReg or null.
- SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg);
+ /// Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
/// Add DBG_VALUE instruction to our maps.
///
@@ -454,8 +457,8 @@ public:
MF = nullptr;
userValues.clear();
userLabels.clear();
- VirtRegToUserVals.clear();
- UserVarMap.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
// Make sure we call emitDebugValues if the machine function was modified.
assert((!ModifiedMF || EmitDone) &&
"Dbg values are not emitted in LDV");
@@ -463,8 +466,8 @@ public:
ModifiedMF = false;
}
- /// Map virtual register to a UserValue.
- void mapVirtReg(unsigned VirtReg, UserValue *UV);
+ /// Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
/// Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
@@ -503,7 +506,7 @@ static void printExtendedName(raw_ostream &OS, const DINode *Node,
const DILocation *DL) {
const LLVMContext &Ctx = Node->getContext();
StringRef Res;
- unsigned Line;
+ unsigned Line = 0;
if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
Res = V->getName();
Line = V->getLine();
@@ -572,27 +575,31 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
const DIExpression *Expr, const DebugLoc &DL) {
- auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt());
- UserValue *&UVEntry = UserVarMap[Ident];
-
- if (UVEntry)
- return UVEntry;
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Expr, DL->getInlinedAt()))
+ return UV;
+ }
- userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator));
- return UVEntry = userValues.back().get();
+ userValues.push_back(
+ std::make_unique<UserValue>(Var, Expr, DL, allocator));
+ UserValue *UV = userValues.back().get();
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
}
-void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) {
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
- assert(UserVarMap.find(UV->getId()) != UserVarMap.end() &&
- "UserValue should exist in UserVarMap");
- VirtRegToUserVals[VirtReg].push_back(UV);
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
}
-SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) {
- VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg);
- if (Itr != VirtRegToUserVals.end())
- return &Itr->getSecond();
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
return nullptr;
}
@@ -1086,23 +1093,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
}
}
- // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
- locations.erase(locations.begin() + OldLocNo);
- LocMapI.goToBegin();
- while (LocMapI.valid()) {
- DbgValueLocation v = LocMapI.value();
- if (v.locNo() == OldLocNo) {
- LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
- << LocMapI.stop() << ")\n");
- LocMapI.erase();
- } else {
- // Undef values always have location number UndefLocNo, so don't change
- // locNo in that case. See getLocationNo().
- if (!v.isUndef() && v.locNo() > OldLocNo)
- LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));
- ++LocMapI;
- }
- }
+ // Finally, remove OldLocNo unless it is still used by some interval in the
+ // locInts map. One case when OldLocNo still is in use is when the register
+ // has been spilled. In such situations the spilled register is kept as a
+ // location until rewriteLocations is called (VirtRegMap is mapping the old
+ // register to the spill slot). So for a while we can have locations that map
+ // to virtual registers that have been removed from both the MachineFunction
+ // and from LiveIntervals.
+ removeLocationIfUnused(OldLocNo);
LLVM_DEBUG({
dbgs() << "Split result: \t";
@@ -1129,18 +1127,16 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
bool DidChange = false;
- if (auto *UserVals = lookupVirtReg(OldReg))
- for (auto *UV : *UserVals)
- DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+ for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
if (!DidChange)
return;
// Map all of the new virtual registers.
- if (auto *UserVals = lookupVirtReg(OldReg))
- for (auto *UV : *UserVals)
- for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i], UV);
+ UserValue *UV = lookupVirtReg(OldReg);
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i], UV);
}
void LiveDebugVariables::
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 54ac46f2e7ce..930dc116205a 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -883,7 +883,8 @@ void LiveInterval::clearSubRanges() {
static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
LaneBitmask LaneMask,
const SlotIndexes &Indexes,
- const TargetRegisterInfo &TRI) {
+ const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
// Phys reg should not be tracked at subreg level.
// Same for noreg (Reg == 0).
if (!Register::isVirtualRegister(Reg) || !Reg)
@@ -905,7 +906,12 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
continue;
if (MOI->getReg() != Reg)
continue;
- if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+ LaneBitmask OrigMask = TRI.getSubRegIndexLaneMask(MOI->getSubReg());
+ LaneBitmask ExpectedDefMask =
+ ComposeSubRegIdx
+ ? TRI.composeSubRegIndexLaneMask(ComposeSubRegIdx, OrigMask)
+ : OrigMask;
+ if ((ExpectedDefMask & LaneMask).none())
continue;
hasDef = true;
break;
@@ -924,7 +930,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
void LiveInterval::refineSubRanges(
BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
std::function<void(LiveInterval::SubRange &)> Apply,
- const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
+ const SlotIndexes &Indexes, const TargetRegisterInfo &TRI,
+ unsigned ComposeSubRegIdx) {
LaneBitmask ToApply = LaneMask;
for (SubRange &SR : subranges()) {
LaneBitmask SRMask = SR.LaneMask;
@@ -944,8 +951,10 @@ void LiveInterval::refineSubRanges(
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
// Now that the subrange is split in half, make sure we
// only keep in the subranges the VNIs that touch the related half.
- stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
- stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
+ stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI,
+ ComposeSubRegIdx);
+ stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI,
+ ComposeSubRegIdx);
}
Apply(*MatchingRange);
ToApply &= ~Matching;
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 2989930ad093..9c80282bc59e 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -191,12 +191,12 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
/// Compute the live interval of a virtual register, based on defs and uses.
-void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
+bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
- computeDeadValues(LI, nullptr);
+ return computeDeadValues(LI, nullptr);
}
void LiveIntervals::computeVirtRegs() {
@@ -204,7 +204,12 @@ void LiveIntervals::computeVirtRegs() {
unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
- createAndComputeVirtRegInterval(Reg);
+ LiveInterval &LI = createEmptyInterval(Reg);
+ bool NeedSplit = computeVirtRegInterval(LI);
+ if (NeedSplit) {
+ SmallVector<LiveInterval*, 8> SplitLIs;
+ splitSeparateComponents(LI, SplitLIs);
+ }
}
}
@@ -500,6 +505,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
bool MayHaveSplitComponents = false;
+ bool HaveDeadDef = false;
+
for (VNInfo *VNI : LI.valnos) {
if (VNI->isUnused())
continue;
@@ -530,6 +537,10 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
MachineInstr *MI = getInstructionFromIndex(Def);
assert(MI && "No instruction defining live value");
MI->addRegisterDead(LI.reg, TRI);
+ if (HaveDeadDef)
+ MayHaveSplitComponents = true;
+ HaveDeadDef = true;
+
if (dead && MI->allDefsAreDead()) {
LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
dead->push_back(MI);
@@ -1061,9 +1072,9 @@ private:
// Kill flags shouldn't be used while live intervals exist, they will be
// reinserted by VirtRegRewriter.
if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end))
- for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
- if (MO->isReg() && MO->isUse())
- MO->setIsKill(false);
+ for (MachineOperand &MOP : mi_bundle_ops(*KillMI))
+ if (MOP.isReg() && MOP.isUse())
+ MOP.setIsKill(false);
// Is there a def before NewIdx which is not OldIdx?
LiveRange::iterator Next = std::next(OldIdxIn);
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index c2a1cc7c6490..7a5cffca3470 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@@ -42,28 +43,23 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
/// Remove defined registers and regmask kills from the set.
void LivePhysRegs::removeDefs(const MachineInstr &MI) {
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
- if (!O->isDef() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
- continue;
- removeReg(Reg);
- } else if (O->isRegMask())
- removeRegsInMask(*O);
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ removeRegsInMask(MOP);
+ continue;
+ }
+
+ if (MOP.isDef())
+ removeReg(MOP.getReg());
}
}
/// Add uses to the set.
void LivePhysRegs::addUses(const MachineInstr &MI) {
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (!MOP.isReg() || !MOP.readsReg())
continue;
- addReg(Reg);
+ addReg(MOP.getReg());
}
}
@@ -116,7 +112,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
}
}
-/// Prin the currently live registers to OS.
+/// Print the currently live registers to OS.
void LivePhysRegs::print(raw_ostream &OS) const {
OS << "Live Registers:";
if (!TRI) {
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index cbf112ee2bd5..2ebc8d7576d1 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 72c79e5f8a75..08f046420fa1 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp
index 97763def1f40..b2731aa0e7db 100644
--- a/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -43,41 +43,34 @@ void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
void LiveRegUnits::stepBackward(const MachineInstr &MI) {
// Remove defined registers and regmask kills from the set.
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
- if (!O->isDef() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
- continue;
- removeReg(Reg);
- } else if (O->isRegMask())
- removeRegsNotPreserved(O->getRegMask());
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ removeRegsNotPreserved(MOP.getRegMask());
+ continue;
+ }
+
+ if (MOP.isDef())
+ removeReg(MOP.getReg());
}
// Add uses to the set.
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (!O->isReg() || !O->readsReg() || O->isDebug())
- continue;
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (!MOP.isReg() || !MOP.readsReg())
continue;
- addReg(Reg);
+ addReg(MOP.getReg());
}
}
void LiveRegUnits::accumulate(const MachineInstr &MI) {
// Add defs, uses and regmask clobbers to the set.
- for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg()) {
- Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
- continue;
- if (!O->isDef() && !O->readsReg())
- continue;
- addReg(Reg);
- } else if (O->isRegMask())
- addRegsInMask(O->getRegMask());
+ for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+ if (MOP.isRegMask()) {
+ addRegsInMask(MOP.getRegMask());
+ continue;
+ }
+ if (!MOP.isDef() && !MOP.readsReg())
+ continue;
+ addReg(MOP.getReg());
}
}
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 2392d4d00b56..5022726dc70a 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index ca0daa14fedf..40dfa696a2b9 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -24,14 +24,37 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (NumElements == 1)
return ScalarTy;
return LLT::vector(NumElements, ScalarTy);
- } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
- } else if (Ty.isSized()) {
+ }
+
+ if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+ unsigned AddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ }
+
+ if (Ty.isSized()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
auto SizeInBits = DL.getTypeSizeInBits(&Ty);
assert(SizeInBits != 0 && "invalid zero-sized type");
return LLT::scalar(SizeInBits);
}
+
return LLT();
}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::vector(Ty.getVectorNumElements(),
+ Ty.getVectorElementType().getSizeInBits());
+}
diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp
index ed48365b0102..529d478756d4 100644
--- a/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index c9bb5461aa3c..5ef907b88315 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -48,10 +49,6 @@ static cl::opt<unsigned>
cl::value_desc("N"),
cl::desc("Function number to canonicalize."));
-static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
- "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
- cl::desc("BasicBlock number to canonicalize."));
-
namespace {
class MIRCanonicalizer : public MachineFunctionPass {
@@ -373,34 +370,14 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {
}
static bool runOnBasicBlock(MachineBasicBlock *MBB,
- std::vector<StringRef> &bbNames,
- unsigned &basicBlockNum, NamedVRegCursor &NVC) {
-
- if (CanonicalizeBasicBlockNumber != ~0U) {
- if (CanonicalizeBasicBlockNumber != basicBlockNum++)
- return false;
- LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
- << "\n";);
- }
-
- if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
- LLVM_DEBUG({
- dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
- << "\n";
- });
- return false;
- }
-
+ unsigned BasicBlockNum, VRegRenamer &Renamer) {
LLVM_DEBUG({
dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
dbgs() << "\n\n================================================\n\n";
});
bool Changed = false;
- MachineFunction &MF = *MBB->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- bbNames.push_back(MBB->getName());
LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
@@ -413,32 +390,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
- Changed |= NVC.renameVRegs(MBB);
-
- // Here we renumber the def vregs for the idempotent instructions from the top
- // of the MachineBasicBlock so that they are named in the order that we sorted
- // them alphabetically. Eventually we wont need SkipVRegs because we will use
- // named vregs instead.
- if (IdempotentInstCount)
- NVC.skipVRegs();
-
- auto MII = MBB->begin();
- for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
- MachineInstr &MI = *MII++;
- Changed = true;
- Register vRegToRename = MI.getOperand(0).getReg();
- auto Rename = NVC.createVirtualRegister(vRegToRename);
-
- std::vector<MachineOperand *> RenameMOs;
- for (auto &MO : MRI.reg_operands(vRegToRename)) {
- RenameMOs.push_back(&MO);
- }
-
- for (auto *MO : RenameMOs) {
- MO->setReg(Rename);
- }
- }
+ Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
+ // TODO: Consider dropping this. Dropping kill defs is probably not
+ // semantically sound.
Changed |= doDefKillClear(MBB);
LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
@@ -470,16 +425,12 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
: RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
<< "\n\n================================================\n\n";);
- std::vector<StringRef> BBNames;
-
unsigned BBNum = 0;
-
bool Changed = false;
-
MachineRegisterInfo &MRI = MF.getRegInfo();
- NamedVRegCursor NVC(MRI);
+ VRegRenamer Renamer(MRI);
for (auto MBB : RPOList)
- Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
+ Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
return Changed;
}
diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp
index 9d719f3917ce..9f61dd9ef243 100644
--- a/llvm/lib/CodeGen/MIRNamerPass.cpp
+++ b/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -54,11 +55,12 @@ public:
if (MF.empty())
return Changed;
- NamedVRegCursor NVC(MF.getRegInfo());
+ VRegRenamer Renamer(MF.getRegInfo());
+ unsigned BBIndex = 0;
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
for (auto &MBB : RPOT)
- Changed |= NVC.renameVRegs(MBB);
+ Changed |= Renamer.renameVRegs(MBB, BBIndex++);
return Changed;
}
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index ad5c617623f2..5976f5da1569 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -204,7 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nuw" , MIToken::kw_nuw)
.Case("nsw" , MIToken::kw_nsw)
.Case("exact" , MIToken::kw_exact)
- .Case("fpexcept", MIToken::kw_fpexcept)
+ .Case("nofpexcept", MIToken::kw_nofpexcept)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
@@ -242,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("jump-table", MIToken::kw_jump_table)
.Case("constant-pool", MIToken::kw_constant_pool)
.Case("call-entry", MIToken::kw_call_entry)
+ .Case("custom", MIToken::kw_custom)
.Case("liveout", MIToken::kw_liveout)
.Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
@@ -252,6 +253,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("shufflemask", MIToken::kw_shufflemask)
.Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
.Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+ .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
.Case("unknown-size", MIToken::kw_unknown_size)
.Default(MIToken::Identifier);
}
@@ -582,8 +584,8 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
.Default(MIToken::Error);
}
-static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
- ErrorCallbackType ErrorCallback) {
+static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
+ ErrorCallbackType ErrorCallback) {
if (C.peek() != '!')
return None;
auto Range = C;
@@ -719,7 +721,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
return R.remaining();
if (Cursor R = maybeLexNumericalLiteral(C, Token))
return R.remaining();
- if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
+ if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback))
return R.remaining();
if (Cursor R = maybeLexSymbol(C, Token))
return R.remaining();
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 200f9d026cc8..aaffe4a4c91b 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -73,7 +73,7 @@ struct MIToken {
kw_nuw,
kw_nsw,
kw_exact,
- kw_fpexcept,
+ kw_nofpexcept,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
@@ -110,6 +110,7 @@ struct MIToken {
kw_jump_table,
kw_constant_pool,
kw_call_entry,
+ kw_custom,
kw_liveout,
kw_address_taken,
kw_landing_pad,
@@ -120,6 +121,7 @@ struct MIToken {
kw_shufflemask,
kw_pre_instr_symbol,
kw_post_instr_symbol,
+ kw_heap_alloc_marker,
kw_unknown_size,
// Named metadata keywords
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6498acc9fa51..076ca943788b 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -28,6 +28,7 @@
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -343,6 +344,37 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) {
return *I.first->second;
}
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ int Slot = MST.getLocalSlot(V);
+ if (Slot == -1)
+ return;
+ Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+ DenseMap<unsigned, const Value *> &Slots2Values) {
+ ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+ MST.incorporateFunction(F);
+ for (const auto &Arg : F.args())
+ mapValueToSlot(&Arg, MST, Slots2Values);
+ for (const auto &BB : F) {
+ mapValueToSlot(&BB, MST, Slots2Values);
+ for (const auto &I : BB)
+ mapValueToSlot(&I, MST, Slots2Values);
+ }
+}
+
+const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
+ if (Slots2Values.empty())
+ initSlots2Values(MF.getFunction(), Slots2Values);
+ auto ValueInfo = Slots2Values.find(Slot);
+ if (ValueInfo == Slots2Values.end())
+ return nullptr;
+ return ValueInfo->second;
+}
+
namespace {
/// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -370,8 +402,6 @@ class MIParser {
PerFunctionMIParsingState &PFS;
/// Maps from slot numbers to function's unnamed basic blocks.
DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
- /// Maps from slot numbers to function's unnamed values.
- DenseMap<unsigned, const Value *> Slots2Values;
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -455,9 +485,12 @@ public:
bool parseTargetIndexOperand(MachineOperand &Dest);
bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
- bool parseMachineOperand(MachineOperand &Dest,
+ bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
- bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+ bool parseMachineOperandAndTargetFlags(const unsigned OpCode,
+ const unsigned OpIdx,
+ MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
bool parseAlignment(unsigned &Alignment);
@@ -471,6 +504,10 @@ public:
bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
+ bool parseHeapAllocMarker(MDNode *&Node);
+
+ bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest, const MIRFormatter &MF);
private:
/// Convert the integer literal in the current token into an unsigned integer.
@@ -508,8 +545,6 @@ private:
const BasicBlock *getIRBlock(unsigned Slot);
const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
- const Value *getIRValue(unsigned Slot);
-
/// Get or create an MCSymbol for a given name.
MCSymbol *getOrCreateMCSymbol(StringRef Name);
@@ -550,6 +585,9 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
return true;
}
+typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
+ ErrorCallbackType;
+
static const char *toString(MIToken::TokenKind TokenKind) {
switch (TokenKind) {
case MIToken::comma:
@@ -906,11 +944,12 @@ bool MIParser::parse(MachineInstr *&MI) {
// Parse the remaining machine operands.
while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
Token.isNot(MIToken::kw_post_instr_symbol) &&
+ Token.isNot(MIToken::kw_heap_alloc_marker) &&
Token.isNot(MIToken::kw_debug_location) &&
Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
auto Loc = Token.location();
Optional<unsigned> TiedDefIdx;
- if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
+ if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
MO.setIsDebug();
@@ -932,6 +971,10 @@ bool MIParser::parse(MachineInstr *&MI) {
if (Token.is(MIToken::kw_post_instr_symbol))
if (parsePreOrPostInstrSymbol(PostInstrSymbol))
return true;
+ MDNode *HeapAllocMarker = nullptr;
+ if (Token.is(MIToken::kw_heap_alloc_marker))
+ if (parseHeapAllocMarker(HeapAllocMarker))
+ return true;
DebugLoc DebugLocation;
if (Token.is(MIToken::kw_debug_location)) {
@@ -985,6 +1028,8 @@ bool MIParser::parse(MachineInstr *&MI) {
MI->setPreInstrSymbol(MF, PreInstrSymbol);
if (PostInstrSymbol)
MI->setPostInstrSymbol(MF, PostInstrSymbol);
+ if (HeapAllocMarker)
+ MI->setHeapAllocMarker(MF, HeapAllocMarker);
if (!MemOperands.empty())
MI->setMemRefs(MF, MemOperands);
return false;
@@ -1138,7 +1183,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
- Token.is(MIToken::kw_fpexcept)) {
+ Token.is(MIToken::kw_nofpexcept)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1164,8 +1209,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::NoSWrap;
if (Token.is(MIToken::kw_exact))
Flags |= MachineInstr::IsExact;
- if (Token.is(MIToken::kw_fpexcept))
- Flags |= MachineInstr::FPExcept;
+ if (Token.is(MIToken::kw_nofpexcept))
+ Flags |= MachineInstr::NoFPExcept;
lex();
}
@@ -1485,17 +1530,61 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
- const Constant *&C) {
+bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
+ const unsigned OpIdx,
+ MachineOperand &Dest,
+ const MIRFormatter &MF) {
+ assert(Token.is(MIToken::dot));
+ auto Loc = Token.location(); // record start position
+ size_t Len = 1; // for "."
+ lex();
+
+ // Handle the case that mnemonic starts with number.
+ if (Token.is(MIToken::IntegerLiteral)) {
+ Len += Token.range().size();
+ lex();
+ }
+
+ StringRef Src;
+ if (Token.is(MIToken::comma))
+ Src = StringRef(Loc, Len);
+ else {
+ assert(Token.is(MIToken::Identifier));
+ Src = StringRef(Loc, Len + Token.stringValue().size());
+ }
+ int64_t Val;
+ if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
+ [this](StringRef::iterator Loc, const Twine &Msg)
+ -> bool { return error(Loc, Msg); }))
+ return true;
+
+ Dest = MachineOperand::CreateImm(Val);
+ if (!Token.is(MIToken::comma))
+ lex();
+ return false;
+}
+
+static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ PerFunctionMIParsingState &PFS, const Constant *&C,
+ ErrorCallbackType ErrCB) {
auto Source = StringValue.str(); // The source has to be null terminated.
SMDiagnostic Err;
- C = parseConstantValue(Source, Err, *MF.getFunction().getParent(),
+ C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(),
&PFS.IRSlots);
if (!C)
- return error(Loc + Err.getColumnNo(), Err.getMessage());
+ return ErrCB(Loc + Err.getColumnNo(), Err.getMessage());
return false;
}
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+ const Constant *&C) {
+ return ::parseIRConstant(
+ Loc, StringValue, PFS, C,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
return true;
@@ -1628,27 +1717,52 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::getUnsigned(unsigned &Result) {
+static bool getHexUint(const MIToken &Token, APInt &Result) {
+ assert(Token.is(MIToken::HexLiteral));
+ StringRef S = Token.range();
+ assert(S[0] == '0' && tolower(S[1]) == 'x');
+ // This could be a floating point literal with a special prefix.
+ if (!isxdigit(S[2]))
+ return true;
+ StringRef V = S.substr(2);
+ APInt A(V.size()*4, V, 16);
+
+ // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
+ // sure it isn't the case before constructing result.
+ unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
+ Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
+ return false;
+}
+
+static bool getUnsigned(const MIToken &Token, unsigned &Result,
+ ErrorCallbackType ErrCB) {
if (Token.hasIntegerValue()) {
const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
if (Val64 == Limit)
- return error("expected 32-bit integer (too large)");
+ return ErrCB(Token.location(), "expected 32-bit integer (too large)");
Result = Val64;
return false;
}
if (Token.is(MIToken::HexLiteral)) {
APInt A;
- if (getHexUint(A))
+ if (getHexUint(Token, A))
return true;
if (A.getBitWidth() > 32)
- return error("expected 32-bit integer (too large)");
+ return ErrCB(Token.location(), "expected 32-bit integer (too large)");
Result = A.getZExtValue();
return false;
}
return true;
}
+bool MIParser::getUnsigned(unsigned &Result) {
+ return ::getUnsigned(
+ Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
assert(Token.is(MIToken::MachineBasicBlock) ||
Token.is(MIToken::MachineBasicBlockLabel));
@@ -1728,23 +1842,25 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+static bool parseGlobalValue(const MIToken &Token,
+ PerFunctionMIParsingState &PFS, GlobalValue *&GV,
+ ErrorCallbackType ErrCB) {
switch (Token.kind()) {
case MIToken::NamedGlobalValue: {
- const Module *M = MF.getFunction().getParent();
+ const Module *M = PFS.MF.getFunction().getParent();
GV = M->getNamedValue(Token.stringValue());
if (!GV)
- return error(Twine("use of undefined global value '") + Token.range() +
- "'");
+ return ErrCB(Token.location(), Twine("use of undefined global value '") +
+ Token.range() + "'");
break;
}
case MIToken::GlobalValue: {
unsigned GVIdx;
- if (getUnsigned(GVIdx))
+ if (getUnsigned(Token, GVIdx, ErrCB))
return true;
if (GVIdx >= PFS.IRSlots.GlobalValues.size())
- return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
- "'");
+ return ErrCB(Token.location(), Twine("use of undefined global value '@") +
+ Twine(GVIdx) + "'");
GV = PFS.IRSlots.GlobalValues[GVIdx];
break;
}
@@ -1754,6 +1870,14 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) {
return false;
}
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+ return ::parseGlobalValue(
+ Token, PFS, GV,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
GlobalValue *GV = nullptr;
if (parseGlobalValue(GV))
@@ -2295,23 +2419,13 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
if (expectAndConsume(MIToken::lparen))
return error("expected syntax shufflemask(<integer or undef>, ...)");
- SmallVector<Constant *, 32> ShufMask;
- LLVMContext &Ctx = MF.getFunction().getContext();
- Type *I32Ty = Type::getInt32Ty(Ctx);
-
- bool AllZero = true;
- bool AllUndef = true;
-
+ SmallVector<int, 32> ShufMask;
do {
if (Token.is(MIToken::kw_undef)) {
- ShufMask.push_back(UndefValue::get(I32Ty));
- AllZero = false;
+ ShufMask.push_back(-1);
} else if (Token.is(MIToken::IntegerLiteral)) {
- AllUndef = false;
const APSInt &Int = Token.integerValue();
- if (!Int.isNullValue())
- AllZero = false;
- ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue()));
+ ShufMask.push_back(Int.getExtValue());
} else
return error("expected integer constant");
@@ -2321,13 +2435,8 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
if (expectAndConsume(MIToken::rparen))
return error("shufflemask should be terminated by ')'.");
- if (AllZero || AllUndef) {
- VectorType *VT = VectorType::get(I32Ty, ShufMask.size());
- Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT);
- Dest = MachineOperand::CreateShuffleMask(C);
- } else
- Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask));
-
+ ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);
+ Dest = MachineOperand::CreateShuffleMask(MaskAlloc);
return false;
}
@@ -2402,7 +2511,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
return false;
}
-bool MIParser::parseMachineOperand(MachineOperand &Dest,
+bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+ MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx) {
switch (Token.kind()) {
case MIToken::kw_implicit:
@@ -2491,6 +2601,13 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
return parseCustomRegisterMaskOperand(Dest);
} else
return parseTypedImmediateOperand(Dest);
+ case MIToken::dot: {
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ if (const auto *Formatter = TII->getMIRFormatter()) {
+ return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter);
+ }
+ LLVM_FALLTHROUGH;
+ }
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -2499,7 +2616,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
}
bool MIParser::parseMachineOperandAndTargetFlags(
- MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+ const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest,
+ Optional<unsigned> &TiedDefIdx) {
unsigned TF = 0;
bool HasTargetFlags = false;
if (Token.is(MIToken::kw_target_flags)) {
@@ -2531,7 +2649,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
return true;
}
auto Loc = Token.location();
- if (parseMachineOperand(Dest, TiedDefIdx))
+ if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx))
return true;
if (!HasTargetFlags)
return false;
@@ -2592,30 +2710,31 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
return false;
}
-bool MIParser::parseIRValue(const Value *&V) {
+static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS,
+ const Value *&V, ErrorCallbackType ErrCB) {
switch (Token.kind()) {
case MIToken::NamedIRValue: {
- V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
+ V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
break;
}
case MIToken::IRValue: {
unsigned SlotNumber = 0;
- if (getUnsigned(SlotNumber))
+ if (getUnsigned(Token, SlotNumber, ErrCB))
return true;
- V = getIRValue(SlotNumber);
+ V = PFS.getIRValue(SlotNumber);
break;
}
case MIToken::NamedGlobalValue:
case MIToken::GlobalValue: {
GlobalValue *GV = nullptr;
- if (parseGlobalValue(GV))
+ if (parseGlobalValue(Token, PFS, GV, ErrCB))
return true;
V = GV;
break;
}
case MIToken::QuotedIRValue: {
const Constant *C = nullptr;
- if (parseIRConstant(Token.location(), Token.stringValue(), C))
+ if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB))
return true;
V = C;
break;
@@ -2624,10 +2743,17 @@ bool MIParser::parseIRValue(const Value *&V) {
llvm_unreachable("The current token should be an IR block reference");
}
if (!V)
- return error(Twine("use of undefined IR value '") + Token.range() + "'");
+ return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'");
return false;
}
+bool MIParser::parseIRValue(const Value *&V) {
+ return ::parseIRValue(
+ Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ });
+}
+
bool MIParser::getUint64(uint64_t &Result) {
if (Token.hasIntegerValue()) {
if (Token.integerValue().getActiveBits() > 64)
@@ -2648,20 +2774,7 @@ bool MIParser::getUint64(uint64_t &Result) {
}
bool MIParser::getHexUint(APInt &Result) {
- assert(Token.is(MIToken::HexLiteral));
- StringRef S = Token.range();
- assert(S[0] == '0' && tolower(S[1]) == 'x');
- // This could be a floating point literal with a special prefix.
- if (!isxdigit(S[2]))
- return true;
- StringRef V = S.substr(2);
- APInt A(V.size()*4, V, 16);
-
- // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
- // sure it isn't the case before constructing result.
- unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
- Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
- return false;
+ return ::getHexUint(Token, Result);
}
bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
@@ -2748,6 +2861,20 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
"expected a global value or an external symbol after 'call-entry'");
}
break;
+ case MIToken::kw_custom: {
+ lex();
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ if (const auto *Formatter = TII->getMIRFormatter()) {
+ if (Formatter->parseCustomPseudoSourceValue(
+ Token.stringValue(), MF, PFS, PSV,
+ [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+ return error(Loc, Msg);
+ }))
+ return true;
+ } else
+ return error("unable to parse target custom pseudo source value");
+ break;
+ }
default:
llvm_unreachable("The current token should be pseudo source value");
}
@@ -2759,7 +2886,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
- Token.is(MIToken::kw_call_entry)) {
+ Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) {
const PseudoSourceValue *PSV = nullptr;
if (parseMemoryPseudoSourceValue(PSV))
return true;
@@ -2956,6 +3083,22 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
return false;
}
+bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
+ assert(Token.is(MIToken::kw_heap_alloc_marker) &&
+ "Invalid token for a heap alloc marker!");
+ lex();
+ parseMDNode(Node);
+ if (!Node)
+ return error("expected a MDNode after 'heap-alloc-marker'");
+ if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+ Token.is(MIToken::lbrace))
+ return false;
+ if (Token.isNot(MIToken::comma))
+ return error("expected ',' before the next machine operand");
+ lex();
+ return false;
+}
+
static void initSlots2BasicBlocks(
const Function &F,
DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
@@ -2994,37 +3137,6 @@ const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
}
-static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
- DenseMap<unsigned, const Value *> &Slots2Values) {
- int Slot = MST.getLocalSlot(V);
- if (Slot == -1)
- return;
- Slots2Values.insert(std::make_pair(unsigned(Slot), V));
-}
-
-/// Creates the mapping from slot numbers to function's unnamed IR values.
-static void initSlots2Values(const Function &F,
- DenseMap<unsigned, const Value *> &Slots2Values) {
- ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
- MST.incorporateFunction(F);
- for (const auto &Arg : F.args())
- mapValueToSlot(&Arg, MST, Slots2Values);
- for (const auto &BB : F) {
- mapValueToSlot(&BB, MST, Slots2Values);
- for (const auto &I : BB)
- mapValueToSlot(&I, MST, Slots2Values);
- }
-}
-
-const Value *MIParser::getIRValue(unsigned Slot) {
- if (Slots2Values.empty())
- initSlots2Values(MF.getFunction(), Slots2Values);
- auto ValueInfo = Slots2Values.find(Slot);
- if (ValueInfo == Slots2Values.end())
- return nullptr;
- return ValueInfo->second;
-}
-
MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
// FIXME: Currently we can't recognize temporary or local symbols and call all
// of the appropriate forms to create them. However, this handles basic cases
@@ -3087,3 +3199,15 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
}
+
+bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF,
+ PerFunctionMIParsingState &PFS, const Value *&V,
+ ErrorCallbackType ErrorCallback) {
+ MIToken Token;
+ Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) {
+ ErrorCallback(Loc, Msg);
+ });
+ V = nullptr;
+
+ return ::parseIRValue(Token, PFS, V, ErrorCallback);
+}
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 55fac93d8991..10157c746b46 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -64,9 +64,12 @@ class MIRParserImpl {
/// parts.
bool NoMIRDocuments = false;
+ std::function<void(Function &)> ProcessIRFunction;
+
public:
- MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
- StringRef Filename, LLVMContext &Context);
+ MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
+ LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction);
void reportDiagnostic(const SMDiagnostic &Diag);
@@ -92,6 +95,9 @@ public:
/// Return null if an error occurred.
std::unique_ptr<Module> parseIRModule();
+ /// Create an empty function with the given name.
+ Function *createDummyFunction(StringRef Name, Module &M);
+
bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
/// Parse the machine function in the current YAML document.
@@ -163,13 +169,13 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
}
MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
- StringRef Filename, LLVMContext &Context)
+ StringRef Filename, LLVMContext &Context,
+ std::function<void(Function &)> Callback)
: SM(),
- In(SM.getMemoryBuffer(
- SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(),
- nullptr, handleYAMLDiag, this),
- Filename(Filename),
- Context(Context) {
+ In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))
+ ->getBuffer(),
+ nullptr, handleYAMLDiag, this),
+ Filename(Filename), Context(Context), ProcessIRFunction(Callback) {
In.setContext(&In);
}
@@ -256,14 +262,17 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
return false;
}
-/// Create an empty function with the given name.
-static Function *createDummyFunction(StringRef Name, Module &M) {
+Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
auto &Context = M.getContext();
Function *F =
Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
Function::ExternalLinkage, Name, M);
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
new UnreachableInst(Context, BB);
+
+ if (ProcessIRFunction)
+ ProcessIRFunction(*F);
+
return F;
}
@@ -925,21 +934,23 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
return Impl->parseMachineFunctions(M, MMI);
}
-std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
- SMDiagnostic &Error,
- LLVMContext &Context) {
+std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
+ StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction) {
auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
if (std::error_code EC = FileOrErr.getError()) {
Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + EC.message());
return nullptr;
}
- return createMIRParser(std::move(FileOrErr.get()), Context);
+ return createMIRParser(std::move(FileOrErr.get()), Context,
+ ProcessIRFunction);
}
std::unique_ptr<MIRParser>
llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
- LLVMContext &Context) {
+ LLVMContext &Context,
+ std::function<void(Function &)> ProcessIRFunction) {
auto Filename = Contents->getBufferIdentifier();
if (Context.shouldDiscardValueNames()) {
Context.diagnose(DiagnosticInfoMIRParser(
@@ -949,6 +960,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
"Can't read MIR with a Context that discards named Values")));
return nullptr;
}
- return std::make_unique<MIRParser>(
- std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
+ return std::make_unique<MIRParser>(std::make_unique<MIRParserImpl>(
+ std::move(Contents), Filename, Context, ProcessIRFunction));
}
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 1a4e21ac06a9..e8cd3d60ccb1 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -752,8 +752,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nsw ";
if (MI.getFlag(MachineInstr::IsExact))
OS << "exact ";
- if (MI.getFlag(MachineInstr::FPExcept))
- OS << "fpexcept ";
+ if (MI.getFlag(MachineInstr::NoFPExcept))
+ OS << "nofpexcept ";
OS << TII->getName(MI.getOpcode());
if (I < E)
@@ -784,6 +784,13 @@ void MIPrinter::print(const MachineInstr &MI) {
MachineOperand::printSymbol(OS, *PostInstrSymbol);
NeedComma = true;
}
+ if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) {
+ if (NeedComma)
+ OS << ',';
+ OS << " heap-alloc-marker ";
+ HeapAllocMarker->printAsOperand(OS, MST);
+ NeedComma = true;
+ }
if (const DebugLoc &DL = MI.getDebugLoc()) {
if (NeedComma)
@@ -849,7 +856,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
- Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false,
+ Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
break;
}
@@ -867,6 +874,28 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
}
}
+void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
+ ModuleSlotTracker &MST) {
+ if (isa<GlobalValue>(V)) {
+ V.printAsOperand(OS, /*PrintType=*/false, MST);
+ return;
+ }
+ if (isa<Constant>(V)) {
+ // Machine memory operands can load/store to/from constant value pointers.
+ OS << '`';
+ V.printAsOperand(OS, /*PrintType=*/true, MST);
+ OS << '`';
+ return;
+ }
+ OS << "%ir.";
+ if (V.hasName()) {
+ printLLVMNameWithoutPrefix(OS, V.getName());
+ return;
+ }
+ int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+ MachineOperand::printIRSlotNumber(OS, Slot);
+}
+
void llvm::printMIR(raw_ostream &OS, const Module &M) {
yaml::Output Out(OS);
Out << const_cast<Module &>(M);
diff --git a/llvm/lib/CodeGen/MIRPrintingPass.cpp b/llvm/lib/CodeGen/MIRPrintingPass.cpp
index e032fffd658c..1b5a9ade0871 100644
--- a/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIRPrinter.h"
-
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 6629000f468f..fcc40b26c527 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -13,336 +13,144 @@ using namespace llvm;
#define DEBUG_TYPE "mir-vregnamer-utils"
-namespace {
+using VRegRenameMap = std::map<unsigned, unsigned>;
-// TypedVReg and VRType are used to tell the renamer what to do at points in a
-// sequence of values to be renamed. A TypedVReg can either contain
-// an actual VReg, a FrameIndex, or it could just be a barrier for the next
-// candidate (side-effecting instruction). This tells the renamer to increment
-// to the next vreg name, or to skip modulo some skip-gap value.
-enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
-class TypedVReg {
- VRType Type;
- Register Reg;
-
-public:
- TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {}
- TypedVReg(VRType Type) : Type(Type), Reg(~0U) {
- assert(Type != RSE_Reg && "Expected a non-Register Type.");
- }
-
- bool isReg() const { return Type == RSE_Reg; }
- bool isFrameIndex() const { return Type == RSE_FrameIndex; }
- bool isCandidate() const { return Type == RSE_NewCandidate; }
-
- VRType getType() const { return Type; }
- Register getReg() const {
- assert(this->isReg() && "Expected a virtual or physical Register.");
- return Reg;
- }
-};
-
-/// Here we find our candidates. What makes an interesting candidate?
-/// A candidate for a canonicalization tree root is normally any kind of
-/// instruction that causes side effects such as a store to memory or a copy to
-/// a physical register or a return instruction. We use these as an expression
-/// tree root that we walk in order to build a canonical walk which should
-/// result in canonical vreg renaming.
-std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
- std::vector<MachineInstr *> Candidates;
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
- MachineInstr *MI = &*II;
-
- bool DoesMISideEffect = false;
-
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
- const Register Dst = MI->getOperand(0).getReg();
- DoesMISideEffect |= !Register::isVirtualRegister(Dst);
-
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- if (DoesMISideEffect)
- break;
- DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
- }
- }
-
- if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
- continue;
+bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
+ bool Changed = false;
- LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
- Candidates.push_back(MI);
+ for (const auto &E : VRM) {
+ Changed = Changed || !MRI.reg_empty(E.first);
+ MRI.replaceRegWith(E.first, E.second);
}
- return Candidates;
-}
-
-void doCandidateWalk(std::vector<TypedVReg> &VRegs,
- std::queue<TypedVReg> &RegQueue,
- std::vector<MachineInstr *> &VisitedMIs,
- const MachineBasicBlock *MBB) {
-
- const MachineFunction &MF = *MBB->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- while (!RegQueue.empty()) {
-
- auto TReg = RegQueue.front();
- RegQueue.pop();
-
- if (TReg.isFrameIndex()) {
- LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
- VRegs.push_back(TypedVReg(RSE_FrameIndex));
- continue;
- }
-
- assert(TReg.isReg() && "Expected vreg or physreg.");
- Register Reg = TReg.getReg();
-
- if (Register::isVirtualRegister(Reg)) {
- LLVM_DEBUG({
- dbgs() << "Popping vreg ";
- MRI.def_begin(Reg)->dump();
- dbgs() << "\n";
- });
-
- if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
- return TR.isReg() && TR.getReg() == Reg;
- })) {
- VRegs.push_back(TypedVReg(Reg));
- }
- } else {
- LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
- VRegs.push_back(TypedVReg(Reg));
- continue;
- }
-
- for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
- MachineInstr *Def = RI->getParent();
-
- if (Def->getParent() != MBB)
- continue;
-
- if (llvm::any_of(VisitedMIs,
- [&](const MachineInstr *VMI) { return Def == VMI; })) {
- break;
- }
-
- LLVM_DEBUG({
- dbgs() << "\n========================\n";
- dbgs() << "Visited MI: ";
- Def->dump();
- dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
- dbgs() << "\n========================\n";
- });
- VisitedMIs.push_back(Def);
- for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
-
- MachineOperand &MO = Def->getOperand(I);
- if (MO.isFI()) {
- LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
- RegQueue.push(TypedVReg(RSE_FrameIndex));
- }
-
- if (!MO.isReg())
- continue;
- RegQueue.push(TypedVReg(MO.getReg()));
- }
- }
- }
+ return Changed;
}
-std::map<unsigned, unsigned>
-getVRegRenameMap(const std::vector<TypedVReg> &VRegs,
- const std::vector<Register> &renamedInOtherBB,
- MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
- std::map<unsigned, unsigned> VRegRenameMap;
- bool FirstCandidate = true;
-
- for (auto &vreg : VRegs) {
- if (vreg.isFrameIndex()) {
- // We skip one vreg for any frame index because there is a good chance
- // (especially when comparing SelectionDAG to GlobalISel generated MIR)
- // that in the other file we are just getting an incoming vreg that comes
- // from a copy from a frame index. So it's safe to skip by one.
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
- continue;
- } else if (vreg.isCandidate()) {
-
- // After the first candidate, for every subsequent candidate, we skip mod
- // 10 registers so that the candidates are more likely to start at the
- // same vreg number making it more likely that the canonical walk from the
- // candidate insruction. We don't need to skip from the first candidate of
- // the BasicBlock because we already skip ahead several vregs for each BB.
- unsigned LastRenameReg = NVC.getVirtualVReg();
- if (FirstCandidate)
- NVC.incrementVirtualVReg(LastRenameReg % 10);
- FirstCandidate = false;
- continue;
- } else if (!Register::isVirtualRegister(vreg.getReg())) {
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG({
- dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
- });
- continue;
- }
-
- auto Reg = vreg.getReg();
- if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
- LLVM_DEBUG(dbgs() << "Vreg " << Reg
- << " already renamed in other BB.\n";);
- continue;
- }
+VRegRenameMap
+VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
- auto Rename = NVC.createVirtualRegister(Reg);
+ StringMap<unsigned> VRegNameCollisionMap;
- if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
- LLVM_DEBUG(dbgs() << "Mapping vreg ";);
- if (MRI.reg_begin(Reg) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Reg;);
- }
- LLVM_DEBUG(dbgs() << " to ";);
- if (MRI.reg_begin(Rename) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Rename;);
- }
- LLVM_DEBUG(dbgs() << "\n";);
+ auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
+ if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end())
+ VRegNameCollisionMap[Reg.getName()] = 0;
+ const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
+ return Reg.getName() + "__" + std::to_string(Counter);
+ };
- VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
- }
+ VRegRenameMap VRM;
+ for (const auto &VReg : VRegs) {
+ const unsigned Reg = VReg.getReg();
+ VRM[Reg] = createVirtualRegisterWithLowerName(Reg, GetUniqueVRegName(VReg));
}
-
- return VRegRenameMap;
+ return VRM;
}
-bool doVRegRenaming(std::vector<Register> &renamedInOtherBB,
- const std::map<unsigned, unsigned> &VRegRenameMap,
- MachineRegisterInfo &MRI) {
- bool Changed = false;
- for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
-
- auto VReg = I->first;
- auto Rename = I->second;
-
- renamedInOtherBB.push_back(Rename);
-
- std::vector<MachineOperand *> RenameMOs;
- for (auto &MO : MRI.reg_operands(VReg)) {
- RenameMOs.push_back(&MO);
- }
-
- for (auto *MO : RenameMOs) {
- Changed = true;
- MO->setReg(Rename);
+std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
+ std::string S;
+ raw_string_ostream OS(S);
- if (!MO->isDef())
- MO->setIsKill(false);
+ // Gets a hashable artifact from a given MachineOperand (ie an unsigned).
+ auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
+ switch (MO.getType()) {
+ case MachineOperand::MO_CImmediate:
+ return hash_combine(MO.getType(), MO.getTargetFlags(),
+ MO.getCImm()->getZExtValue());
+ case MachineOperand::MO_FPImmediate:
+ return hash_combine(
+ MO.getType(), MO.getTargetFlags(),
+ MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+ case MachineOperand::MO_Register:
+ if (Register::isVirtualRegister(MO.getReg()))
+ return MRI.getVRegDef(MO.getReg())->getOpcode();
+ return MO.getReg();
+ case MachineOperand::MO_Immediate:
+ return MO.getImm();
+ case MachineOperand::MO_TargetIndex:
+ return MO.getOffset() | (MO.getTargetFlags() << 16);
+ case MachineOperand::MO_FrameIndex:
+ return llvm::hash_value(MO);
+
+ // We could explicitly handle all the types of the MachineOperand,
+ // here but we can just return a common number until we find a
+ // compelling test case where this is bad. The only side effect here
+ // is contributing to a hash collision but there's enough information
+ // (Opcodes,other registers etc) that this will likely not be a problem.
+
+ // TODO: Handle the following Index/ID/Predicate cases. They can
+ // be hashed on in a stable manner.
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_CFIIndex:
+ case MachineOperand::MO_IntrinsicID:
+ case MachineOperand::MO_Predicate:
+
+ // In the cases below we havn't found a way to produce an artifact that will
+ // result in a stable hash, in most cases because they are pointers. We want
+ // stable hashes because we want the hash to be the same run to run.
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ case MachineOperand::MO_ShuffleMask:
+ return 0;
}
+ llvm_unreachable("Unexpected MachineOperandType.");
+ };
+
+ SmallVector<unsigned, 16> MIOperands = {MI.getOpcode(), MI.getFlags()};
+ llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO);
+
+ for (const auto *Op : MI.memoperands()) {
+ MIOperands.push_back((unsigned)Op->getSize());
+ MIOperands.push_back((unsigned)Op->getFlags());
+ MIOperands.push_back((unsigned)Op->getOffset());
+ MIOperands.push_back((unsigned)Op->getOrdering());
+ MIOperands.push_back((unsigned)Op->getAddrSpace());
+ MIOperands.push_back((unsigned)Op->getSyncScopeID());
+ MIOperands.push_back((unsigned)Op->getBaseAlignment());
+ MIOperands.push_back((unsigned)Op->getFailureOrdering());
}
- return Changed;
+ auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
+ return std::to_string(HashMI).substr(0, 5);
}
-bool renameVRegs(MachineBasicBlock *MBB,
- std::vector<Register> &renamedInOtherBB,
- NamedVRegCursor &NVC) {
- bool Changed = false;
- MachineFunction &MF = *MBB->getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
- std::vector<MachineInstr *> VisitedMIs;
- llvm::copy(Candidates, std::back_inserter(VisitedMIs));
-
- std::vector<TypedVReg> VRegs;
- for (auto candidate : Candidates) {
- VRegs.push_back(TypedVReg(RSE_NewCandidate));
-
- std::queue<TypedVReg> RegQueue;
-
- // Here we walk the vreg operands of a non-root node along our walk.
- // The root nodes are the original candidates (stores normally).
- // These are normally not the root nodes (except for the case of copies to
- // physical registers).
- for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
- if (candidate->mayStore() || candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
- if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
- RegQueue.push(TypedVReg(MO.getReg()));
- }
-
- // Here we walk the root candidates. We start from the 0th operand because
- // the root is normally a store to a vreg.
- for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
-
- if (!candidate->mayStore() && !candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
-
- // TODO: Do we want to only add vregs here?
- if (!MO.isReg() && !MO.isFI())
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
-
- RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
- : TypedVReg(RSE_FrameIndex));
- }
-
- doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
- }
-
- // If we have populated no vregs to rename then bail.
- // The rest of this function does the vreg remaping.
- if (VRegs.size() == 0)
- return Changed;
-
- auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
- Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
- return Changed;
+unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
+ assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers");
+ std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg));
+ return createVirtualRegisterWithLowerName(VReg, Name);
}
-} // anonymous namespace
-void NamedVRegCursor::skipVRegs() {
- unsigned VRegGapIndex = 1;
- if (!virtualVRegNumber) {
- VRegGapIndex = 0;
- virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
+ std::vector<NamedVReg> VRegs;
+ std::string Prefix = "bb" + std::to_string(CurrentBBNumber) + "_";
+ for (MachineInstr &Candidate : *MBB) {
+ // Don't rename stores/branches.
+ if (Candidate.mayStore() || Candidate.isBranch())
+ continue;
+ if (!Candidate.getNumOperands())
+ continue;
+ // Look for instructions that define VRegs in operand 0.
+ MachineOperand &MO = Candidate.getOperand(0);
+ // Avoid non regs, instructions defining physical regs.
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ continue;
+ VRegs.push_back(
+ NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
}
- const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize);
- unsigned I = virtualVRegNumber;
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
- virtualVRegNumber = E;
-}
-
-unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) {
- if (!virtualVRegNumber)
- skipVRegs();
- std::string S;
- raw_string_ostream OS(S);
- OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
- OS.flush();
- virtualVRegNumber++;
- if (auto RC = MRI.getRegClassOrNull(VReg))
- return MRI.createVirtualRegister(RC, OS.str());
- return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
+ return VRegs.size() ? doVRegRenaming(getVRegRenameMap(VRegs)) : false;
}
-bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) {
- return ::renameVRegs(MBB, RenamedInOtherBB, *this);
+unsigned VRegRenamer::createVirtualRegisterWithLowerName(unsigned VReg,
+ StringRef Name) {
+ std::string LowerName = Name.lower();
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
+ return RC ? MRI.createVirtualRegister(RC, LowerName)
+ : MRI.createGenericVirtualRegister(MRI.getType(VReg), LowerName);
}
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/llvm/lib/CodeGen/MIRVRegNamerUtils.h
index c5b52a968538..0c0a71a13248 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.h
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.h
@@ -25,65 +25,67 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/raw_ostream.h"
-#include <queue>
-
namespace llvm {
+/// VRegRenamer - This class is used for renaming vregs in a machine basic
+/// block according to semantics of the instruction.
+class VRegRenamer {
+ class NamedVReg {
+ Register Reg;
+ std::string Name;
+
+ public:
+ NamedVReg(Register Reg, std::string Name = "") : Reg(Reg), Name(Name) {}
+ NamedVReg(std::string Name = "") : Reg(~0U), Name(Name) {}
+
+ const std::string &getName() const { return Name; }
+
+ Register getReg() const { return Reg; }
+ };
-/// NamedVRegCursor - The cursor is an object that keeps track of what the next
-/// vreg name should be. It does book keeping to determine when to skip the
-/// index value and by how much, or if the next vreg name should be an increment
-/// from the previous.
-class NamedVRegCursor {
MachineRegisterInfo &MRI;
- /// virtualVRegNumber - Book keeping of the last vreg position.
- unsigned virtualVRegNumber;
+ unsigned CurrentBBNumber = 0;
- /// SkipGapSize - Used to calculate a modulo amount to skip by after every
- /// sequence of instructions starting from a given side-effecting
- /// MachineInstruction for a given MachineBasicBlock. The general idea is that
- /// for a given program compiled with two different opt pipelines, there
- /// shouldn't be greater than SkipGapSize difference in how many vregs are in
- /// play between the two and for every def-use graph of vregs we rename we
- /// will round up to the next SkipGapSize'th number so that we have a high
- /// change of landing on the same name for two given matching side-effects
- /// for the two compilation outcomes.
- const unsigned SkipGapSize;
+ /// Given an Instruction, construct a hash of the operands
+ /// of the instructions along with the opcode.
+ /// When dealing with virtual registers, just hash the opcode of
+ /// the instruction defining that vreg.
+ /// Handle immediates, registers (physical and virtual) explicitly,
+ /// and return a common value for the other cases.
+ /// Instruction will be named in the following scheme
+ /// bb<block_no>_hash_<collission_count>.
+ std::string getInstructionOpcodeHash(MachineInstr &MI);
- /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs.
- std::vector<Register> RenamedInOtherBB;
+ /// For all the VRegs that are candidates for renaming,
+ /// return a mapping from old vregs to new vregs with names.
+ std::map<unsigned, unsigned>
+ getVRegRenameMap(const std::vector<NamedVReg> &VRegs);
-public:
- NamedVRegCursor() = delete;
- /// 1000 for the SkipGapSize was a good heuristic at the time of the writing
- /// of the MIRCanonicalizerPass. Adjust as needed.
- NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000)
- : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {}
-
- /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to
- /// produce the next vreg name.
- void skipVRegs();
-
- unsigned getVirtualVReg() const { return virtualVRegNumber; }
-
- /// incrementVirtualVReg - This increments an index value that us used to
- /// create a new vreg name. This is not a Register.
- unsigned incrementVirtualVReg(unsigned incr = 1) {
- virtualVRegNumber += incr;
- return virtualVRegNumber;
- }
+ /// Perform replacing of registers based on the <old,new> vreg map.
+ bool doVRegRenaming(const std::map<unsigned, unsigned> &VRegRenameMap);
/// createVirtualRegister - Given an existing vreg, create a named vreg to
- /// take its place.
+ /// take its place. The name is determined by calling
+ /// getInstructionOpcodeHash.
unsigned createVirtualRegister(unsigned VReg);
- /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting
- /// instructions, walk the def-use from each side-effecting root (in sorted
- /// root order) and rename the encountered vregs in the def-use graph in a
- /// canonical ordering. This method maintains book keeping for which vregs
- /// were already renamed in RenamedInOtherBB.
- // @return changed
- bool renameVRegs(MachineBasicBlock *MBB);
+ /// Create a vreg with name and return it.
+ unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name);
+ /// Linearly traverse the MachineBasicBlock and rename each instruction's
+ /// vreg definition based on the semantics of the instruction.
+ /// Names are as follows bb<BBNum>_hash_[0-9]+
+ bool renameInstsInMBB(MachineBasicBlock *MBB);
+
+public:
+ VRegRenamer() = delete;
+ VRegRenamer(MachineRegisterInfo &MRI) : MRI(MRI) {}
+
+ /// Same as the above, but sets a BBNum depending on BB traversal that
+ /// will be used as prefix for the vreg names.
+ bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum) {
+ CurrentBBNumber = BBNum;
+ return renameInstsInMBB(MBB);
+ }
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 854bef3aab05..f433c4b6c90b 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1395,8 +1395,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
--N;
- MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+ PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
// Register is live when we read it here.
if (Info.Read)
@@ -1434,8 +1433,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
--N;
- MachineOperandIteratorBase::PhysRegInfo Info =
- ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+ PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
// Defs happen after uses so they take precedence if both are present.
@@ -1462,6 +1460,11 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
} while (I != begin() && N > 0);
}
+ // If all the instructions before this in the block are debug instructions,
+ // skip over them.
+ while (I != begin() && std::prev(I)->isDebugInstr())
+ --I;
+
// Did we get to the start of the block?
if (I == begin()) {
// If so, the register's state is definitely defined by the live-in state.
diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 53a35b7e89c2..d8ea3e0b9cf6 100644
--- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GraphWriter.h"
@@ -172,6 +173,13 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
}
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
+ MachineFunction &F,
+ MachineBranchProbabilityInfo &MBPI,
+ MachineLoopInfo &MLI) : MachineFunctionPass(ID) {
+ calculate(F, MBPI, MLI);
+}
+
MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index ac19bc0bd8ea..30b98ec88c24 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -41,6 +42,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -48,6 +50,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BlockFrequency.h"
@@ -362,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// A handle to the post dominator tree.
MachinePostDominatorTree *MPDT;
+ ProfileSummaryInfo *PSI;
+
/// Duplicator used to duplicate tails during placement.
///
/// Placement decisions can open up new tail duplication opportunities, but
@@ -537,6 +542,7 @@ public:
if (TailDupPlacement)
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -554,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
@@ -1073,6 +1080,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
if (!shouldTailDuplicate(Succ))
return false;
+ // The result of canTailDuplicate.
+ bool Duplicate = true;
+ // Number of possible duplication.
+ unsigned int NumDup = 0;
+
// For CFG checking.
SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
BB->succ_end());
@@ -1119,9 +1131,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// to trellises created by tail-duplication, so we just look for the
// CFG.
continue;
- return false;
+ Duplicate = false;
+ continue;
}
+ NumDup++;
}
+
+ // No possible duplication in current filter set.
+ if (NumDup == 0)
+ return false;
+
+ // This is mainly for function exit BB.
+ // The integrated tail duplication is really designed for increasing
+ // fallthrough from predecessors from Succ to its successors. We may need
+ // other machanism to handle different cases.
+ if (Succ->succ_size() == 0)
+ return true;
+
+ // Plus the already placed predecessor.
+ NumDup++;
+
+ // If the duplication candidate has more unplaced predecessors than
+ // successors, the extra duplication can't bring more fallthrough.
+ //
+ // Pred1 Pred2 Pred3
+ // \ | /
+ // \ | /
+ // \ | /
+ // Dup
+ // / \
+ // / \
+ // Succ1 Succ2
+ //
+ // In this example Dup has 2 successors and 3 predecessors, duplication of Dup
+ // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
+ // but the duplication into Pred3 can't increase fallthrough.
+ //
+ // A small number of extra duplication may not hurt too much. We need a better
+ // heuristic to handle it.
+ //
+ // FIXME: we should selectively tail duplicate a BB into part of its
+ // predecessors.
+ if ((NumDup > Succ->succ_size()) || !Duplicate)
+ return false;
+
return true;
}
@@ -1417,9 +1470,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
bool BadCFGConflict = false;
for (MachineBasicBlock *Pred : Succ->predecessors()) {
- if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (Pred == Succ || PredChain == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain ||
+ PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
// This check is redundant except for look ahead. This function is
// called for lookahead by isProfitableToTailDup when BB hasn't been
// placed yet.
@@ -1721,7 +1775,9 @@ void MachineBlockPlacement::buildChain(
MachineBasicBlock* BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
if (allowTailDupPlacement())
- ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
+ ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
+ Chain,
+ BlockFilter));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -2025,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// i.e. when the layout predecessor does not fallthrough to the loop header.
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
- if (F->getFunction().hasOptSize())
+ bool OptForSize = F->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI,
+ &MBFI->getMBFI());
+ if (OptForSize)
return L.getHeader();
MachineBasicBlock *OldTop = nullptr;
@@ -2781,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {
if (Freq < (LoopHeaderFreq * ColdProb))
continue;
+ // If the global profiles indicates so, don't align it.
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+ !TLI->alignLoopsWithOptSize())
+ continue;
+
// Check for the existence of a non-layout predecessor which would benefit
// from aligning this block.
MachineBasicBlock *LayoutPred =
@@ -2988,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MPDT = nullptr;
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
@@ -3018,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction().hasOptSize())
+ bool OptForSize = MF.getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
+ if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
- TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
+ TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+ /* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
@@ -3037,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (MF.size() > 3 && EnableTailMerge) {
unsigned TailMergeSize = TailDupSize + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
- *MBPI, TailMergeSize);
+ *MBPI, PSI, TailMergeSize);
auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index d2277ce51746..f1d68c79a212 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -13,6 +13,8 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,6 +39,12 @@ cl::opt<unsigned> ProfileLikelyProb(
char MachineBranchProbabilityInfo::ID = 0;
+MachineBranchProbabilityInfo::MachineBranchProbabilityInfo()
+ : ImmutablePass(ID) {
+ PassRegistry &Registry = *PassRegistry::getPassRegistry();
+ initializeMachineBranchProbabilityInfoPass(Registry);
+}
+
void MachineBranchProbabilityInfo::anchor() {}
BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index d9bd32b2fbab..9561a06ce8df 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index e9f462fd1b37..73895bdf834f 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -12,17 +12,21 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,6 +70,8 @@ class MachineCombiner : public MachineFunctionPass {
MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineTraceMetrics *Traces;
MachineTraceMetrics::Ensemble *MinInstr;
+ MachineBlockFrequencyInfo *MBFI;
+ ProfileSummaryInfo *PSI;
TargetSchedModel TSchedModel;
@@ -82,7 +88,7 @@ public:
StringRef getPassName() const override { return "Machine InstCombiner"; }
private:
- bool doSubstitute(unsigned NewSize, unsigned OldSize);
+ bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
bool combineInstructions(MachineBasicBlock *);
MachineInstr *getOperandDef(const MachineOperand &MO);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -131,6 +137,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<MachineTraceMetrics>();
AU.addPreserved<MachineTraceMetrics>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -408,8 +416,9 @@ bool MachineCombiner::preservesResourceLen(
/// \returns true when new instruction sequence should be generated
/// independent if it lengthens critical path or not
-bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
- if (OptSize && (NewSize < OldSize))
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
+ bool OptForSize) {
+ if (OptForSize && (NewSize < OldSize))
return true;
if (!TSchedModel.hasInstrSchedModelOrItineraries())
return true;
@@ -507,6 +516,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
SparseSet<LiveRegUnit> RegUnits;
RegUnits.setUniverse(TRI->getNumRegUnits());
+ bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+
while (BlockIter != MBB->end()) {
auto &MI = *BlockIter++;
SmallVector<MachineCombinerPattern, 16> Patterns;
@@ -583,7 +594,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
+ if (SubstituteAlways ||
+ doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
RegUnits, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
@@ -638,6 +650,10 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
MinInstr = nullptr;
OptSize = MF.getFunction().hasOptSize();
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index ebe76e31dca9..c316b167059b 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -37,6 +37,15 @@
// ... // No clobber of %R0
// %R1 = COPY %R0 <<< Removed
//
+// or
+//
+// $R0 = OP ...
+// ... // No read/clobber of $R0 and $R1
+// $R1 = COPY $R0 // $R0 is killed
+// Replace $R0 with $R1 and remove the COPY
+// $R1 = OP ...
+// ...
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
@@ -54,6 +63,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -68,6 +78,7 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead copies deleted");
STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
+STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");
@@ -97,6 +108,28 @@ public:
}
}
+ /// Remove register from copy maps.
+ void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+ // Since Reg might be a subreg of some registers, only invalidate Reg is not
+ // enough. We have to find the COPY defines Reg or registers defined by Reg
+ // and invalidate all of them.
+ DenseSet<unsigned> RegsToInvalidate{Reg};
+ for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+ auto I = Copies.find(*RUI);
+ if (I != Copies.end()) {
+ if (MachineInstr *MI = I->second.MI) {
+ RegsToInvalidate.insert(MI->getOperand(0).getReg());
+ RegsToInvalidate.insert(MI->getOperand(1).getReg());
+ }
+ RegsToInvalidate.insert(I->second.DefRegs.begin(),
+ I->second.DefRegs.end());
+ }
+ }
+ for (unsigned InvalidReg : RegsToInvalidate)
+ for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
+ Copies.erase(*RUI);
+ }
+
/// Clobber a single register, removing it from the tracker's copy maps.
void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
@@ -150,6 +183,38 @@ public:
return CI->second.MI;
}
+ MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
+ const TargetRegisterInfo &TRI) {
+ auto CI = Copies.find(RegUnit);
+ if (CI == Copies.end())
+ return nullptr;
+ if (CI->second.DefRegs.size() != 1)
+ return nullptr;
+ MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
+ return findCopyForUnit(*RUI, TRI, true);
+ }
+
+ MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
+ const TargetRegisterInfo &TRI) {
+ MCRegUnitIterator RUI(Reg, &TRI);
+ MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
+ if (!AvailCopy ||
+ !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
+ return nullptr;
+
+ Register AvailSrc = AvailCopy->getOperand(1).getReg();
+ Register AvailDef = AvailCopy->getOperand(0).getReg();
+ for (const MachineInstr &MI :
+ make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef?
+ if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+ return nullptr;
+
+ return AvailCopy;
+ }
+
MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
const TargetRegisterInfo &TRI) {
// We check the first RegUnit here, since we'll only be interested in the
@@ -210,11 +275,16 @@ private:
void ClobberRegister(unsigned Reg);
void ReadRegister(unsigned Reg, MachineInstr &Reader,
DebugType DT);
- void CopyPropagateBlock(MachineBasicBlock &MBB);
+ void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
+ void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
void forwardUses(MachineInstr &MI);
+ void propagateDefs(MachineInstr &MI);
bool isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI, unsigned UseIdx);
+ bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
+ const MachineInstr &UseI,
+ unsigned UseIdx);
bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
/// Candidates for deletion.
@@ -312,6 +382,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
return true;
}
+bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
+ const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
+ Register Def = Copy.getOperand(0).getReg();
+
+ if (const TargetRegisterClass *URC =
+ UseI.getRegClassConstraint(UseIdx, TII, TRI))
+ return URC->contains(Def);
+
+ // We don't process further if UseI is a COPY, since forward copy propagation
+ // should handle that.
+ return false;
+}
+
/// Decide whether we should forward the source of \param Copy to its use in
/// \param UseI based on the physical register class constraints of the opcode
/// and avoiding introducing more cross-class COPYs.
@@ -432,6 +515,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (hasImplicitOverlap(MI, MOUse))
continue;
+ // Check that the instruction is not a copy that partially overwrites the
+ // original copy source that we are about to use. The tracker mechanism
+ // cannot cope with that.
+ if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) &&
+ !MI.definesRegister(CopySrcReg)) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
+ continue;
+ }
+
if (!DebugCounter::shouldExecute(FwdCounter)) {
LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n "
<< MI);
@@ -458,8 +550,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
}
}
-void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
- LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
+ << "\n");
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
MachineInstr *MI = &*I;
@@ -637,6 +730,137 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clear();
}
+static bool isBackwardPropagatableCopy(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ assert(MI.isCopy() && "MI is expected to be a COPY");
+ Register Def = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+
+ if (!Def || !Src)
+ return false;
+
+ if (MRI.isReserved(Def) || MRI.isReserved(Src))
+ return false;
+
+ return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
+}
+
+void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
+ if (!Tracker.hasAnyCopies())
+ return;
+
+ for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+ ++OpIdx) {
+ MachineOperand &MODef = MI.getOperand(OpIdx);
+
+ if (!MODef.isReg() || MODef.isUse())
+ continue;
+
+ // Ignore non-trivial cases.
+ if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit())
+ continue;
+
+ if (!MODef.getReg())
+ continue;
+
+ // We only handle if the register comes from a vreg.
+ if (!MODef.isRenamable())
+ continue;
+
+ MachineInstr *Copy =
+ Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
+ if (!Copy)
+ continue;
+
+ Register Def = Copy->getOperand(0).getReg();
+ Register Src = Copy->getOperand(1).getReg();
+
+ if (MODef.getReg() != Src)
+ continue;
+
+ if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx))
+ continue;
+
+ if (hasImplicitOverlap(MI, MODef))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
+ << "\n with " << printReg(Def, TRI) << "\n in "
+ << MI << " from " << *Copy);
+
+ MODef.setReg(Def);
+ MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
+
+ LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+ MaybeDeadCopies.insert(Copy);
+ Changed = true;
+ ++NumCopyBackwardPropagated;
+ }
+}
+
+void MachineCopyPropagation::BackwardCopyPropagateBlock(
+ MachineBasicBlock &MBB) {
+ LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
+ << "\n");
+
+ for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+ I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ // Ignore non-trivial COPYs.
+ if (MI->isCopy() && MI->getNumOperands() == 2 &&
+ !TRI->regsOverlap(MI->getOperand(0).getReg(),
+ MI->getOperand(1).getReg())) {
+
+ Register Def = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+
+ // Unlike forward cp, we don't invoke propagateDefs here,
+ // just let forward cp do COPY-to-COPY propagation.
+ if (isBackwardPropagatableCopy(*MI, *MRI)) {
+ Tracker.invalidateRegister(Src, *TRI);
+ Tracker.invalidateRegister(Def, *TRI);
+ Tracker.trackCopy(MI, *TRI);
+ continue;
+ }
+ }
+
+ // Invalidate any earlyclobber regs first.
+ for (const MachineOperand &MO : MI->operands())
+ if (MO.isReg() && MO.isEarlyClobber()) {
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ Tracker.invalidateRegister(Reg, *TRI);
+ }
+
+ propagateDefs(*MI);
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+
+ if (!MO.getReg())
+ continue;
+
+ if (MO.isDef())
+ Tracker.invalidateRegister(MO.getReg(), *TRI);
+
+ if (MO.readsReg())
+ Tracker.invalidateRegister(MO.getReg(), *TRI);
+ }
+ }
+
+ for (auto *Copy : MaybeDeadCopies) {
+ Copy->eraseFromParent();
+ ++NumDeletes;
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -647,8 +871,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
MRI = &MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF)
- CopyPropagateBlock(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ BackwardCopyPropagateBlock(MBB);
+ ForwardCopyPropagateBlock(MBB);
+ }
return Changed;
}
diff --git a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index 6704298c17d6..6ddb1758719b 100644
--- a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/DominanceFrontierImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp
index 706c706d7527..c8845d838282 100644
--- a/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/llvm/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -49,11 +50,15 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ calculate(F);
+ return false;
+}
+
+void MachineDominatorTree::calculate(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
DT.reset(new DomTreeBase<MachineBasicBlock>());
DT->recalculate(F);
- return false;
}
MachineDominatorTree::MachineDominatorTree()
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 604f5145b1a0..22ab2c7a6d77 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -133,11 +133,11 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
return BV;
}
-unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
unsigned MaxAlign = getMaxAlignment();
- int Offset = 0;
+ int64_t Offset = 0;
// This code is very, very similar to PEI::calculateFrameObjectOffsets().
// It really should be refactored to share code. Until then, changes
@@ -147,7 +147,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
// Only estimate stack size of default stack.
if (getStackID(i) != TargetStackID::Default)
continue;
- int FixedOff = -getObjectOffset(i);
+ int64_t FixedOff = -getObjectOffset(i);
if (FixedOff > Offset) Offset = FixedOff;
}
for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
@@ -183,7 +183,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
unsigned AlignMask = StackAlign - 1;
Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
- return (unsigned)Offset;
+ return (uint64_t)Offset;
}
void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 7d2ee230ca9f..4612690644fe 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -270,6 +270,21 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
return JumpTableInfo;
}
+DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
+ // TODO: Should probably avoid the connection to the IR and store directly
+ // in the MachineFunction.
+ Attribute Attr = F.getFnAttribute("denormal-fp-math");
+
+ // FIXME: This should assume IEEE behavior on an unspecified
+ // attribute. However, the one current user incorrectly assumes a non-IEEE
+ // target by default.
+ StringRef Val = Attr.getValueAsString();
+ if (Val.empty())
+ return DenormalMode::Invalid;
+
+ return parseDenormalFPAttribute(Val);
+}
+
/// Should we be emitting segmented stack stuff for the function
bool MachineFunction::shouldSplitStack() const {
return getFunction().hasFnAttribute("split-stack");
@@ -447,12 +462,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MMO->getOrdering(), MMO->getFailureOrdering());
}
-MachineInstr::ExtraInfo *
-MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
- MCSymbol *PreInstrSymbol,
- MCSymbol *PostInstrSymbol) {
+MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
+ ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
+ MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker) {
return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
- PostInstrSymbol);
+ PostInstrSymbol, HeapAllocMarker);
}
const char *MachineFunction::createExternalSymbolName(StringRef Name) {
@@ -470,6 +484,12 @@ uint32_t *MachineFunction::allocateRegMask() {
return Mask;
}
+ArrayRef<int> MachineFunction::allocateShuffleMask(ArrayRef<int> Mask) {
+ int* AllocMask = Allocator.Allocate<int>(Mask.size());
+ copy(Mask, AllocMask);
+ return {AllocMask, Mask.size()};
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineFunction::dump() const {
print(dbgs());
@@ -521,6 +541,13 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
OS << "\n# End machine code for function " << getName() << ".\n\n";
}
+/// True if this function needs frame moves for debug or exceptions.
+bool MachineFunction::needsFrameMoves() const {
+ return getMMI().hasDebugInfo() ||
+ getTarget().Options.ForceDwarfFrameSection ||
+ F.needsUnwindTableEntry();
+}
+
namespace llvm {
template<>
@@ -824,15 +851,13 @@ try_next:;
return FilterID;
}
-void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I,
- const MDNode *MD) {
- MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
- MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
- I->setPreInstrSymbol(*this, BeginLabel);
- I->setPostInstrSymbol(*this, EndLabel);
+MachineFunction::CallSiteInfoMap::iterator
+MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
+ assert(MI->isCall() && "Call site info refers only to call instructions!");
- const DIType *DI = dyn_cast<DIType>(MD);
- CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
+ if (!Target.Options.EnableDebugEntryValues)
+ return CallSitesInfo.end();
+ return CallSitesInfo.find(MI);
}
void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0ea8975cc74c..3645a4e3466b 100644
--- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index fec20b2b1a05..08d786f8f12c 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -187,8 +187,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
unsigned NumOps, MachineRegisterInfo *MRI) {
if (MRI)
return MRI->moveOperands(Dst, Src, NumOps);
-
// MachineOperand is a trivially copyable type so we can just use memmove.
+ assert(Dst && Src && "Unknown operands");
std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
}
@@ -316,27 +316,48 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
--NumOperands;
}
-void MachineInstr::dropMemRefs(MachineFunction &MF) {
- if (memoperands_empty())
- return;
+void MachineInstr::setExtraInfo(MachineFunction &MF,
+ ArrayRef<MachineMemOperand *> MMOs,
+ MCSymbol *PreInstrSymbol,
+ MCSymbol *PostInstrSymbol,
+ MDNode *HeapAllocMarker) {
+ bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
+ bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
+ bool HasHeapAllocMarker = HeapAllocMarker != nullptr;
+ int NumPointers =
+ MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + HasHeapAllocMarker;
- // See if we can just drop all of our extra info.
- if (!getPreInstrSymbol() && !getPostInstrSymbol()) {
+ // Drop all extra info if there is none.
+ if (NumPointers <= 0) {
Info.clear();
return;
}
- if (!getPostInstrSymbol()) {
- Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+
+ // If more than one pointer, then store out of line. Store heap alloc markers
+ // out of line because PointerSumType cannot hold more than 4 tag types with
+ // 32-bit pointers.
+ // FIXME: Maybe we should make the symbols in the extra info mutable?
+ else if (NumPointers > 1 || HasHeapAllocMarker) {
+ Info.set<EIIK_OutOfLine>(MF.createMIExtraInfo(
+ MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker));
return;
}
- if (!getPreInstrSymbol()) {
- Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+
+ // Otherwise store the single pointer inline.
+ if (HasPreInstrSymbol)
+ Info.set<EIIK_PreInstrSymbol>(PreInstrSymbol);
+ else if (HasPostInstrSymbol)
+ Info.set<EIIK_PostInstrSymbol>(PostInstrSymbol);
+ else
+ Info.set<EIIK_MMO>(MMOs[0]);
+}
+
+void MachineInstr::dropMemRefs(MachineFunction &MF) {
+ if (memoperands_empty())
return;
- }
- // Otherwise allocate a fresh extra info with just these symbols.
- Info.set<EIIK_OutOfLine>(
- MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol()));
+ setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker());
}
void MachineInstr::setMemRefs(MachineFunction &MF,
@@ -346,15 +367,8 @@ void MachineInstr::setMemRefs(MachineFunction &MF,
return;
}
- // Try to store a single MMO inline.
- if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) {
- Info.set<EIIK_MMO>(MMOs[0]);
- return;
- }
-
- // Otherwise create an extra info struct with all of our info.
- Info.set<EIIK_OutOfLine>(
- MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol()));
+ setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(),
+ getHeapAllocMarker());
}
void MachineInstr::addMemOperand(MachineFunction &MF,
@@ -376,7 +390,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
// instruction. We can do this whenever the pre- and post-instruction symbols
// are the same (including null).
if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
- getPostInstrSymbol() == MI.getPostInstrSymbol()) {
+ getPostInstrSymbol() == MI.getPostInstrSymbol() &&
+ getHeapAllocMarker() == MI.getHeapAllocMarker()) {
Info = MI.Info;
return;
}
@@ -450,67 +465,42 @@ void MachineInstr::cloneMergedMemRefs(MachineFunction &MF,
}
void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
- MCSymbol *OldSymbol = getPreInstrSymbol();
- if (OldSymbol == Symbol)
+ // Do nothing if old and new symbols are the same.
+ if (Symbol == getPreInstrSymbol())
return;
- if (OldSymbol && !Symbol) {
- // We're removing a symbol rather than adding one. Try to clean up any
- // extra info carried around.
- if (Info.is<EIIK_PreInstrSymbol>()) {
- Info.clear();
- return;
- }
- if (memoperands_empty()) {
- assert(getPostInstrSymbol() &&
- "Should never have only a single symbol allocated out-of-line!");
- Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
- return;
- }
-
- // Otherwise fallback on the generic update.
- } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) {
- // If we don't have any other extra info, we can store this inline.
- Info.set<EIIK_PreInstrSymbol>(Symbol);
+ // If there was only one symbol and we're removing it, just clear info.
+ if (!Symbol && Info.is<EIIK_PreInstrSymbol>()) {
+ Info.clear();
return;
}
- // Otherwise, allocate a full new set of extra info.
- // FIXME: Maybe we should make the symbols in the extra info mutable?
- Info.set<EIIK_OutOfLine>(
- MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol()));
+ setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(),
+ getHeapAllocMarker());
}
void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
- MCSymbol *OldSymbol = getPostInstrSymbol();
- if (OldSymbol == Symbol)
+ // Do nothing if old and new symbols are the same.
+ if (Symbol == getPostInstrSymbol())
return;
- if (OldSymbol && !Symbol) {
- // We're removing a symbol rather than adding one. Try to clean up any
- // extra info carried around.
- if (Info.is<EIIK_PostInstrSymbol>()) {
- Info.clear();
- return;
- }
- if (memoperands_empty()) {
- assert(getPreInstrSymbol() &&
- "Should never have only a single symbol allocated out-of-line!");
- Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
- return;
- }
-
- // Otherwise fallback on the generic update.
- } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) {
- // If we don't have any other extra info, we can store this inline.
- Info.set<EIIK_PostInstrSymbol>(Symbol);
+ // If there was only one symbol and we're removing it, just clear info.
+ if (!Symbol && Info.is<EIIK_PostInstrSymbol>()) {
+ Info.clear();
return;
}
- // Otherwise, allocate a full new set of extra info.
- // FIXME: Maybe we should make the symbols in the extra info mutable?
- Info.set<EIIK_OutOfLine>(
- MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol,
+ getHeapAllocMarker());
+}
+
+void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
+ // Do nothing if old and new symbols are the same.
+ if (Marker == getHeapAllocMarker())
+ return;
+
+ setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+ Marker);
}
void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
@@ -524,6 +514,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
setPreInstrSymbol(MF, MI.getPreInstrSymbol());
setPostInstrSymbol(MF, MI.getPostInstrSymbol());
+ setHeapAllocMarker(MF, MI.getHeapAllocMarker());
}
uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -1515,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
++StartOp;
}
@@ -1547,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "nsw ";
if (getFlag(MachineInstr::IsExact))
OS << "exact ";
- if (getFlag(MachineInstr::FPExcept))
- OS << "fpexcept ";
+ if (getFlag(MachineInstr::NoFPExcept))
+ OS << "nofpexcept ";
// Print the opcode name.
if (TII)
@@ -1570,7 +1561,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
const unsigned OpIdx = InlineAsm::MIOp_AsmString;
LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
- getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI,
IntrinsicInfo);
@@ -1609,7 +1600,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else {
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
} else if (isDebugLabel() && MO.isMetadata()) {
@@ -1620,7 +1611,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
else {
LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
unsigned TiedOperandIdx = getTiedOperandIdx(i);
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
} else if (i == AsmDescOp && MO.isImm()) {
@@ -1687,7 +1678,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (MO.isImm() && isOperandSubregIdx(i))
MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);
else
- MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+ MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
}
}
@@ -1710,6 +1701,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << " post-instr-symbol ";
MachineOperand::printSymbol(OS, *PostInstrSymbol);
}
+ if (MDNode *HeapAllocMarker = getHeapAllocMarker()) {
+ if (!FirstOp) {
+ FirstOp = false;
+ OS << ',';
+ }
+ OS << " heap-alloc-marker ";
+ HeapAllocMarker->printAsOperand(OS, MST);
+ }
if (!SkipDebugLoc) {
if (const DebugLoc &DL = getDebugLoc()) {
@@ -1978,7 +1977,7 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
// Build up a buffer of hash code components.
- SmallVector<size_t, 8> HashComponents;
+ SmallVector<size_t, 16> HashComponents;
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index feb849ced353..94865b0e9031 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include <utility>
using namespace llvm;
@@ -277,22 +278,18 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
return Changed;
}
-//===----------------------------------------------------------------------===//
-// MachineOperand iterator
-//===----------------------------------------------------------------------===//
-
-MachineOperandIteratorBase::VirtRegInfo
-MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
- SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
- VirtRegInfo RI = { false, false, false };
- for(; isValid(); ++*this) {
- MachineOperand &MO = deref();
+VirtRegInfo llvm::AnalyzeVirtRegInBundle(
+ MachineInstr &MI, unsigned Reg,
+ SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) {
+ VirtRegInfo RI = {false, false, false};
+ for (MIBundleOperands O(MI); O.isValid(); ++O) {
+ MachineOperand &MO = *O;
if (!MO.isReg() || MO.getReg() != Reg)
continue;
// Remember each (MI, OpNo) that refers to Reg.
if (Ops)
- Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+ Ops->push_back(std::make_pair(MO.getParent(), O.getOperandNo()));
// Both defs and uses can read virtual registers.
if (MO.readsReg()) {
@@ -304,22 +301,22 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
// Only defs can write.
if (MO.isDef())
RI.Writes = true;
- else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+ else if (!RI.Tied &&
+ MO.getParent()->isRegTiedToDefOperand(O.getOperandNo()))
RI.Tied = true;
}
return RI;
}
-MachineOperandIteratorBase::PhysRegInfo
-MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
- const TargetRegisterInfo *TRI) {
+PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
assert(Register::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
- for (; isValid(); ++*this) {
- MachineOperand &MO = deref();
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
PRI.Clobbered = true;
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 6a898ff6ef88..462d4d3b3726 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -39,6 +40,7 @@
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -74,6 +76,27 @@ static cl::opt<bool>
HoistConstStores("hoist-const-stores",
cl::desc("Hoist invariant stores"),
cl::init(true), cl::Hidden);
+// The default threshold of 100 (i.e. if target block is 100 times hotter)
+// is based on empirical data on a single target and is subject to tuning.
+static cl::opt<unsigned>
+BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
+ cl::desc("Do not hoist instructions if target"
+ "block is N times hotter than the source."),
+ cl::init(100), cl::Hidden);
+
+enum class UseBFI { None, PGO, All };
+
+static cl::opt<UseBFI>
+DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
+ cl::desc("Disable hoisting instructions to"
+ " hotter blocks"),
+ cl::init(UseBFI::None), cl::Hidden,
+ cl::values(clEnumValN(UseBFI::None, "none",
+ "disable the feature"),
+ clEnumValN(UseBFI::PGO, "pgo",
+ "enable the feature when using profile data"),
+ clEnumValN(UseBFI::All, "all",
+ "enable the feature with/wo profile data")));
STATISTIC(NumHoisted,
"Number of machine instructions hoisted out of loops");
@@ -87,6 +110,8 @@ STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
STATISTIC(NumStoreConst,
"Number of stores of const phys reg hoisted out of loops");
+STATISTIC(NumNotHoistedDueToHotness,
+ "Number of instructions not hoisted due to block frequency");
namespace {
@@ -98,9 +123,11 @@ namespace {
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
bool PreRegAlloc;
+ bool HasProfileData;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
+ MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
MachineLoopInfo *MLI; // Current MachineLoopInfo
MachineDominatorTree *DT; // Machine dominator tree for the cur loop
@@ -150,6 +177,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
@@ -245,6 +274,8 @@ namespace {
void InitCSEMap(MachineBasicBlock *BB);
+ bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock);
MachineBasicBlock *getCurPreheader();
};
@@ -275,6 +306,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
"Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
@@ -283,6 +315,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
"Early Machine Loop Invariant Code Motion", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
@@ -315,6 +348,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
SchedModel.init(&ST);
PreRegAlloc = MRI->isSSA();
+ HasProfileData = MF.getFunction().hasProfileData();
if (PreRegAlloc)
LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
@@ -333,6 +367,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
}
// Get our Loop information...
+ if (DisableHoistingToHotterBlocks != UseBFI::None)
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1433,6 +1469,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
/// that are safe to hoist, this instruction is called to do the dirty work.
/// It returns true if the instruction is hoisted.
bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+ MachineBasicBlock *SrcBlock = MI->getParent();
+
+ // Disable the instruction hoisting due to block hotness
+ if ((DisableHoistingToHotterBlocks == UseBFI::All ||
+ (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
+ isTgtHotterThanSrc(SrcBlock, Preheader)) {
+ ++NumNotHoistedDueToHotness;
+ return false;
+ }
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
@@ -1526,3 +1571,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() {
}
return CurPreheader;
}
+
+/// Is the target basic block at least "BlockFrequencyRatioThreshold"
+/// times hotter than the source basic block.
+bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+ MachineBasicBlock *TgtBlock) {
+ // Parse source and target basic block frequency from MBFI
+ uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
+ uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
+
+ // Disable the hoisting if source block frequency is zero
+ if (!SrcBF)
+ return true;
+
+ double Ratio = (double)DstBF / SrcBF;
+
+ // Compare the block frequency ratio with the threshold
+ return Ratio > BlockFrequencyRatioThreshold;
+}
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 3b8b430d1b0f..0c1439da9b29 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -27,6 +28,9 @@ template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
char MachineLoopInfo::ID = 0;
+MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) {
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+}
INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
"Machine Natural Loop Construction", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -36,11 +40,15 @@ INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
- releaseMemory();
- LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
+ calculate(getAnalysis<MachineDominatorTree>());
return false;
}
+void MachineLoopInfo::calculate(MachineDominatorTree &MDT) {
+ releaseMemory();
+ LI.analyze(MDT.getBase());
+}
+
void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<MachineDominatorTree>();
diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp
index e074b76082f0..cf30e28449cd 100644
--- a/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -130,3 +131,14 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
return NewBB;
}
+
+bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) {
+ SmallVector<MachineBasicBlock *, 4> ExitBlocks;
+ Loop->getExitBlocks(ExitBlocks);
+
+ for (auto *MBB : ExitBlocks)
+ if (MBB->isLiveIn(PhysReg))
+ return true;
+
+ return false;
+}
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index e0b4e9cac229..0094a923e039 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -20,8 +20,10 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
@@ -116,7 +118,17 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
+ MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken());
+ if (Context.getObjectFileInfo()->getTargetTriple().isOSBinFormatXCOFF()) {
+ MCSymbol *FnEntryPointSym =
+ Context.lookupSymbol("." + Entry.Fn->getName());
+ assert(FnEntryPointSym && "The function entry pointer symbol should have"
+ " already been initialized.");
+ MCSectionXCOFF *Csect =
+ cast<MCSymbolXCOFF>(FnEntryPointSym)->getContainingCsect();
+ cast<MCSymbolXCOFF>(Sym)->setContainingCsect(Csect);
+ }
+ Entry.Symbols.push_back(Sym);
return Entry.Symbols;
}
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 8b19501ec3cf..7b8f01100929 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -458,28 +459,6 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
OS << "<unknown>";
}
-static void printIRValueReference(raw_ostream &OS, const Value &V,
- ModuleSlotTracker &MST) {
- if (isa<GlobalValue>(V)) {
- V.printAsOperand(OS, /*PrintType=*/false, MST);
- return;
- }
- if (isa<Constant>(V)) {
- // Machine memory operands can load/store to/from constant value pointers.
- OS << '`';
- V.printAsOperand(OS, /*PrintType=*/true, MST);
- OS << '`';
- return;
- }
- OS << "%ir.";
- if (V.hasName()) {
- printLLVMNameWithoutPrefix(OS, V.getName());
- return;
- }
- int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
- MachineOperand::printIRSlotNumber(OS, Slot);
-}
-
static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
SyncScope::ID SSID,
SmallVectorImpl<StringRef> &SSNs) {
@@ -734,14 +713,15 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
const TargetIntrinsicInfo *IntrinsicInfo) const {
tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true,
+ print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false,
+ /*IsStandalone=*/true,
/*ShouldPrintRegisterTies=*/true,
/*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
}
void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
- LLT TypeToPrint, bool PrintDef, bool IsStandalone,
- bool ShouldPrintRegisterTies,
+ LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef,
+ bool IsStandalone, bool ShouldPrintRegisterTies,
unsigned TiedOperandIdx,
const TargetRegisterInfo *TRI,
const TargetIntrinsicInfo *IntrinsicInfo) const {
@@ -802,9 +782,19 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << '(' << TypeToPrint << ')';
break;
}
- case MachineOperand::MO_Immediate:
- OS << getImm();
+ case MachineOperand::MO_Immediate: {
+ const MIRFormatter *Formatter = nullptr;
+ if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+ assert(TII && "expected instruction info");
+ Formatter = TII->getMIRFormatter();
+ }
+ if (Formatter)
+ Formatter->printImm(OS, *getParent(), OpIdx, getImm());
+ else
+ OS << getImm();
break;
+ }
case MachineOperand::MO_CImmediate:
getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
break;
@@ -940,13 +930,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
case MachineOperand::MO_ShuffleMask:
OS << "shufflemask(";
- const Constant* C = getShuffleMask();
- const int NumElts = C->getType()->getVectorNumElements();
-
+ ArrayRef<int> Mask = getShuffleMask();
StringRef Separator;
- for (int I = 0; I != NumElts; ++I) {
- OS << Separator;
- C->getAggregateElement(I)->printAsOperand(OS, false, MST);
+ for (int Elt : Mask) {
+ if (Elt == -1)
+ OS << Separator << "undef";
+ else
+ OS << Separator << Elt;
Separator = ", ";
}
@@ -1111,7 +1101,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (const Value *Val = getValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
- printIRValueReference(OS, *Val, MST);
+ MIRFormatter::printIRValue(OS, *Val, MST);
} else if (const PseudoSourceValue *PVal = getPseudoValue()) {
OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
assert(PVal && "Expected a pseudo source value");
@@ -1144,15 +1134,18 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printLLVMNameWithoutPrefix(
OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
break;
- default:
+ default: {
+ const MIRFormatter *Formatter = TII->getMIRFormatter();
// FIXME: This is not necessarily the correct MIR serialization format for
// a custom pseudo source value, but at least it allows
// -print-machineinstrs to work on a target with custom pseudo source
// values.
- OS << "custom ";
- PVal->printCustom(OS);
+ OS << "custom \"";
+ Formatter->printCustomPseudoSourceValue(OS, MST, *PVal);
+ OS << '\"';
break;
}
+ }
}
MachineOperand::printOperandOffset(OS, getOffset());
if (getBaseAlignment() != getSize())
diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index b82403ae1b85..d656953f9115 100644
--- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 8cd66825a58a..3a9104bda0d1 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -91,8 +92,7 @@ STATISTIC(FunctionsCreated, "Number of functions created");
// this is off by default. It should, however, be the default behaviour in
// LTO.
static cl::opt<bool> EnableLinkOnceODROutlining(
- "enable-linkonceodr-outlining",
- cl::Hidden,
+ "enable-linkonceodr-outlining", cl::Hidden,
cl::desc("Enable the machine outliner on linkonceodr functions"),
cl::init(false));
@@ -253,7 +253,7 @@ private:
/// Ukkonen's algorithm.
struct ActiveState {
/// The next node to insert at.
- SuffixTreeNode *Node;
+ SuffixTreeNode *Node = nullptr;
/// The index of the first character in the substring currently being added.
unsigned Idx = EmptyIdx;
@@ -301,8 +301,8 @@ private:
"Non-root internal nodes must have parents!");
unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
- SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, E, Root);
+ SuffixTreeNode *N =
+ new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
if (Parent)
Parent->Children[Edge] = N;
@@ -311,26 +311,31 @@ private:
/// Set the suffix indices of the leaves to the start indices of their
/// respective suffixes.
- ///
- /// \param[in] CurrNode The node currently being visited.
- /// \param CurrNodeLen The concatenation of all node sizes from the root to
- /// this node. Used to produce suffix indices.
- void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
-
- bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
-
- // Store the concatenation of lengths down from the root.
- CurrNode.ConcatLen = CurrNodeLen;
- // Traverse the tree depth-first.
- for (auto &ChildPair : CurrNode.Children) {
- assert(ChildPair.second && "Node had a null child!");
- setSuffixIndices(*ChildPair.second,
- CurrNodeLen + ChildPair.second->size());
- }
+ void setSuffixIndices() {
+ // List of nodes we need to visit along with the current length of the
+ // string.
+ std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
+
+ // Current node being visited.
+ SuffixTreeNode *CurrNode = Root;
+
+ // Sum of the lengths of the nodes down the path to the current one.
+ unsigned CurrNodeLen = 0;
+ ToVisit.push_back({CurrNode, CurrNodeLen});
+ while (!ToVisit.empty()) {
+ std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
+ ToVisit.pop_back();
+ CurrNode->ConcatLen = CurrNodeLen;
+ for (auto &ChildPair : CurrNode->Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ ToVisit.push_back(
+ {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
+ }
- // Is this node a leaf? If it is, give it a suffix index.
- if (IsLeaf)
- CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
+ // No children, so we are at the end of the string.
+ if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
+ CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
+ }
}
/// Construct the suffix tree for the prefix of the input ending at
@@ -473,7 +478,6 @@ public:
// Keep track of the number of suffixes we have to add of the current
// prefix.
unsigned SuffixesToAdd = 0;
- Active.Node = Root;
// Construct the suffix tree iteratively on each prefix of the string.
// PfxEndIdx is the end index of the current prefix.
@@ -487,13 +491,12 @@ public:
// Set the suffix indices of each leaf.
assert(Root && "Root node can't be nullptr!");
- setSuffixIndices(*Root, 0);
+ setSuffixIndices();
}
-
/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
- private:
+ private:
/// The current node we're visiting.
SuffixTreeNode *N = nullptr;
@@ -595,7 +598,7 @@ public:
advance();
}
}
-};
+ };
typedef RepeatedSubstringIterator iterator;
iterator begin() { return iterator(Root); }
@@ -694,9 +697,10 @@ struct InstructionMapper {
/// IllegalInstrNumber.
///
/// \returns The integer that \p *It was mapped to.
- unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
- bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
- std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ unsigned mapToIllegalUnsigned(
+ MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+ std::vector<unsigned> &UnsignedVecForMBB,
+ std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
// Can't outline an illegal instruction. Set the flag.
CanOutlineWithPrevInstr = false;
@@ -764,12 +768,12 @@ struct InstructionMapper {
std::vector<unsigned> UnsignedVecForMBB;
std::vector<MachineBasicBlock::iterator> InstrListForMBB;
- for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
+ for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) {
// Keep track of where this instruction is in the module.
switch (TII.getOutliningType(It, Flags)) {
case InstrType::Illegal:
- mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
- UnsignedVecForMBB, InstrListForMBB);
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
break;
case InstrType::Legal:
@@ -783,7 +787,7 @@ struct InstructionMapper {
// The instruction also acts as a terminator, so we have to record that
// in the string.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
- InstrListForMBB);
+ InstrListForMBB);
break;
case InstrType::Invisible:
@@ -802,7 +806,7 @@ struct InstructionMapper {
// boundaries since the "end" is encoded uniquely and thus appears in no
// repeated substring.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
- InstrListForMBB);
+ InstrListForMBB);
InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
InstrListForMBB.end());
UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
@@ -888,24 +892,27 @@ struct MachineOutliner : public ModulePass {
/// \param FunctionList A list of functions to be inserted into the module.
/// \param Mapper Contains the instruction mappings for the module.
bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper);
+ InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
/// Creates a function for \p OF and inserts it into the module.
MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
InstructionMapper &Mapper,
unsigned Name);
+ /// Calls 'doOutline()'.
+ bool runOnModule(Module &M) override;
+
/// Construct a suffix tree on the instructions in \p M and outline repeated
/// strings from that tree.
- bool runOnModule(Module &M) override;
+ bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
/// Return a DISubprogram for OF if one exists, and null otherwise. Helper
/// function for remark emission.
DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
- DISubprogram *SP;
for (const Candidate &C : OF.Candidates)
- if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
- return SP;
+ if (MachineFunction *MF = C.getMF())
+ if (DISubprogram *SP = MF->getFunction().getSubprogram())
+ return SP;
return nullptr;
}
@@ -918,15 +925,14 @@ struct MachineOutliner : public ModulePass {
/// FIXME: This should be handled by the pass manager, not the outliner.
/// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
/// pass manager.
- void initSizeRemarkInfo(
- const Module &M, const MachineModuleInfo &MMI,
- StringMap<unsigned> &FunctionToInstrCount);
+ void initSizeRemarkInfo(const Module &M, const MachineModuleInfo &MMI,
+ StringMap<unsigned> &FunctionToInstrCount);
/// Emit the remark.
// FIXME: This should be handled by the pass manager, not the outliner.
- void emitInstrCountChangedRemark(
- const Module &M, const MachineModuleInfo &MMI,
- const StringMap<unsigned> &FunctionToInstrCount);
+ void
+ emitInstrCountChangedRemark(const Module &M, const MachineModuleInfo &MMI,
+ const StringMap<unsigned> &FunctionToInstrCount);
};
} // Anonymous namespace.
@@ -1003,13 +1009,12 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
MORE.emit(R);
}
-void
-MachineOutliner::findCandidates(InstructionMapper &Mapper,
- std::vector<OutlinedFunction> &FunctionList) {
+void MachineOutliner::findCandidates(
+ InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec);
- // First, find dall of the repeated substrings in the tree of minimum length
+ // First, find all of the repeated substrings in the tree of minimum length
// 2.
std::vector<Candidate> CandidatesForRepeatedSeq;
for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
@@ -1087,10 +1092,8 @@ MachineOutliner::findCandidates(InstructionMapper &Mapper,
}
}
-MachineFunction *
-MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
- InstructionMapper &Mapper,
- unsigned Name) {
+MachineFunction *MachineOutliner::createOutlinedFunction(
+ Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
// Create the function name. This should be unique.
// FIXME: We should have a better naming scheme. This should be stable,
@@ -1190,13 +1193,11 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
bool MachineOutliner::outline(Module &M,
std::vector<OutlinedFunction> &FunctionList,
- InstructionMapper &Mapper) {
+ InstructionMapper &Mapper,
+ unsigned &OutlinedFunctionNum) {
bool OutlinedSomething = false;
- // Number to append to the current outlined function.
- unsigned OutlinedFunctionNum = 0;
-
// Sort by benefit. The most beneficial functions should be outlined first.
llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
const OutlinedFunction &RHS) {
@@ -1303,12 +1304,6 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
if (F.empty())
continue;
- // Disable outlining from noreturn functions right now. Noreturn requires
- // special handling for the case where what we are outlining could be a
- // tail call.
- if (F.hasFnAttribute(Attribute::NoReturn))
- continue;
-
// There's something in F. Check if it has a MachineFunction associated with
// it.
MachineFunction *MF = MMI.getMachineFunction(F);
@@ -1403,8 +1398,7 @@ void MachineOutliner::emitInstrCountChangedRemark(
MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
MORE.emit([&]() {
MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
- DiagnosticLocation(),
- &MF->front());
+ DiagnosticLocation(), &MF->front());
R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
<< ": Function: "
<< DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
@@ -1427,6 +1421,15 @@ bool MachineOutliner::runOnModule(Module &M) {
if (M.empty())
return false;
+ // Number to append to the current outlined function.
+ unsigned OutlinedFunctionNum = 0;
+
+ if (!doOutline(M, OutlinedFunctionNum))
+ return false;
+ return true;
+}
+
+bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// If the user passed -enable-machine-outliner=always or
@@ -1434,14 +1437,14 @@ bool MachineOutliner::runOnModule(Module &M) {
// Otherwise, if the target supports default outlining, it will run on all
// functions deemed by the target to be worth outlining from by default. Tell
// the user how the outliner is running.
- LLVM_DEBUG(
+ LLVM_DEBUG({
dbgs() << "Machine Outliner: Running on ";
if (RunOnAllFunctions)
dbgs() << "all functions";
else
dbgs() << "target-default functions";
- dbgs() << "\n"
- );
+ dbgs() << "\n";
+ });
// If the user specifies that they want to outline from linkonceodrs, set
// it here.
@@ -1470,7 +1473,8 @@ bool MachineOutliner::runOnModule(Module &M) {
initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
// Outline each of the candidates and return true if something was outlined.
- bool OutlinedSomething = outline(M, FunctionList, Mapper);
+ bool OutlinedSomething =
+ outline(M, FunctionList, Mapper, OutlinedFunctionNum);
// If we outlined something, we definitely changed the MI count of the
// module. If we've asked for size remarks, then output them.
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 89c9f6093a97..ef22caa877c9 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1314,8 +1314,9 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
// Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
// SUnit to the container.
SmallVector<SUnit *, 8> UseSUs;
- for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
- for (auto &Dep : (*I)->Succs) {
+ // Do not use iterator based loop here as we are updating the container.
+ for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
+ for (auto &Dep : PHISUs[Index]->Succs) {
if (Dep.getKind() != SDep::Data)
continue;
diff --git a/llvm/lib/CodeGen/MachinePostDominators.cpp b/llvm/lib/CodeGen/MachinePostDominators.cpp
index f4daff667e86..fb96d0efa4d4 100644
--- a/llvm/lib/CodeGen/MachinePostDominators.cpp
+++ b/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineRegionInfo.cpp b/llvm/lib/CodeGen/MachineRegionInfo.cpp
index 2961d456be0d..45cdcbfeab9f 100644
--- a/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -11,6 +11,7 @@
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index f0721ea3b76d..e42701b9c6ca 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -48,6 +48,7 @@
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -238,6 +239,7 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -402,7 +404,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (EnablePostRAMachineSched.getNumOccurrences()) {
if (!EnablePostRAMachineSched)
return false;
- } else if (!mf.getSubtarget().enablePostRAScheduler()) {
+ } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}
@@ -412,6 +414,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
PassConfig = &getAnalysis<TargetPassConfig>();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
if (VerifyScheduling)
MF->verify(this, "Before post machine scheduling.");
@@ -1495,7 +1498,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
: BaseOp->getIndex() < RHS.BaseOp->getIndex();
if (Offset != RHS.Offset)
- return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+ return Offset < RHS.Offset;
return SU->NodeNum < RHS.SU->NodeNum;
}
@@ -1570,6 +1573,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
SUnit *SUa = MemOpRecords[Idx].SU;
SUnit *SUb = MemOpRecords[Idx+1].SU;
+ if (SUa->NodeNum > SUb->NodeNum)
+ std::swap(SUa, SUb);
if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
*MemOpRecords[Idx + 1].BaseOp,
ClusterLength) &&
@@ -1595,10 +1600,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
/// Callback from DAG postProcessing to create cluster edges for loads.
void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
- // Map DAG NodeNum to store chain ID.
- DenseMap<unsigned, unsigned> StoreChainIDs;
- // Map each store chain to a set of dependent MemOps.
- SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+ // Map DAG NodeNum to a set of dependent MemOps in store chain.
+ DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
for (SUnit &SU : DAG->SUnits) {
if ((IsLoad && !SU.getInstr()->mayLoad()) ||
(!IsLoad && !SU.getInstr()->mayStore()))
@@ -1611,19 +1614,14 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
break;
}
}
- // Check if this chain-like pred has been seen
- // before. ChainPredID==MaxNodeID at the top of the schedule.
- unsigned NumChains = StoreChainDependents.size();
- std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
- StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
- if (Result.second)
- StoreChainDependents.resize(NumChains + 1);
- StoreChainDependents[Result.first->second].push_back(&SU);
+ // Insert the SU to corresponding store chain.
+ auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
+ Chain.push_back(&SU);
}
// Iterate over the store chains.
- for (auto &SCD : StoreChainDependents)
- clusterNeighboringMemOps(SCD, DAG);
+ for (auto &SCD : StoreChains)
+ clusterNeighboringMemOps(SCD.second, DAG);
}
//===----------------------------------------------------------------------===//
@@ -2085,7 +2083,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
return OtherCritCount;
}
-void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
+ unsigned Idx) {
assert(SU->getInstr() && "Scheduled SUnit must have instr");
#ifndef NDEBUG
@@ -2102,11 +2101,19 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
// Check for interlocks first. For the purpose of other heuristics, an
// instruction that cannot issue appears as if it's not in the ReadyQueue.
bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
- Available.size() >= ReadyListLimit)
- Pending.push(SU);
- else
+ bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
+ checkHazard(SU) || (Available.size() >= ReadyListLimit);
+
+ if (!HazardDetected) {
Available.push(SU);
+
+ if (InPQueue)
+ Pending.remove(Pending.begin() + Idx);
+ return;
+ }
+
+ if (!InPQueue)
+ Pending.push(SU);
}
/// Move the boundary of scheduled code by one cycle.
@@ -2346,26 +2353,21 @@ void SchedBoundary::releasePending() {
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
- bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
- for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
- SUnit *SU = *(Pending.begin()+i);
+ for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
+ SUnit *SU = *(Pending.begin() + I);
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
- if (!IsBuffered && ReadyCycle > CurrCycle)
- continue;
-
- if (checkHazard(SU))
- continue;
-
if (Available.size() >= ReadyListLimit)
break;
- Available.push(SU);
- Pending.remove(Pending.begin()+i);
- --i; --e;
+ releaseNode(SU, ReadyCycle, true, I);
+ if (E != Pending.size()) {
+ --I;
+ --E;
+ }
}
CheckPending = false;
}
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 27a2e7023f22..a4ba197b7a1d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -15,6 +15,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +40,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
@@ -105,6 +108,25 @@ namespace {
using AllSuccsCache =
std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
+ /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
+ /// post-dominated by another DBG_VALUE of the same variable location.
+ /// This is necessary to detect sequences such as:
+ /// %0 = someinst
+ /// DBG_VALUE %0, !123, !DIExpression()
+ /// %1 = anotherinst
+ /// DBG_VALUE %1, !123, !DIExpression()
+ /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
+ /// would re-order assignments.
+ using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
+
+ /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
+ /// debug instructions to sink.
+ SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
+
+ /// Record of debug variables that have had their locations set in the
+ /// current block.
+ DenseSet<DebugVariable> SeenDbgVars;
+
public:
static char ID; // Pass identification
@@ -132,6 +154,7 @@ namespace {
private:
bool ProcessBlock(MachineBasicBlock &MBB);
+ void ProcessDbgInst(MachineInstr &MI);
bool isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
@@ -153,8 +176,14 @@ namespace {
MachineBasicBlock *To,
bool BreakPHIEdge);
bool SinkInstruction(MachineInstr &MI, bool &SawStore,
-
AllSuccsCache &AllSuccessors);
+
+ /// If we sink a COPY inst, some debug users of it's destination may no
+ /// longer be dominated by the COPY, and will eventually be dropped.
+ /// This is easily rectified by forwarding the non-dominated debug uses
+ /// to the copy source.
+ void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
+ MachineBasicBlock *TargetBlock);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge, bool &LocalUse) const;
@@ -367,8 +396,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (!ProcessedBegin)
--I;
- if (MI.isDebugInstr())
+ if (MI.isDebugInstr()) {
+ if (MI.isDebugValue())
+ ProcessDbgInst(MI);
continue;
+ }
bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
if (Joined) {
@@ -384,9 +416,29 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
+ SeenDbgUsers.clear();
+ SeenDbgVars.clear();
+
return MadeChange;
}
+void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
+ // When we see DBG_VALUEs for registers, record any vreg it reads, so that
+ // we know what to sink if the vreg def sinks.
+ assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
+
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ bool SeenBefore = SeenDbgVars.count(Var) != 0;
+
+ MachineOperand &MO = MI.getOperand(0);
+ if (MO.isReg() && MO.getReg().isVirtual())
+ SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
+
+ // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
+ SeenDbgVars.insert(Var);
+}
+
bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To) {
@@ -731,18 +783,60 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
MBP.LHS.getReg() == BaseOp->getReg();
}
-/// Sink an instruction and its associated debug instructions. If the debug
-/// instructions to be sunk are already known, they can be provided in DbgVals.
+/// If the sunk instruction is a copy, try to forward the copy instead of
+/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
+/// there's any subregister weirdness involved. Returns true if copy
+/// propagation occurred.
+static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
+ const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
+ const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
+
+ // Copy DBG_VALUE operand and set the original to undef. We then check to
+ // see whether this is something that can be copy-forwarded. If it isn't,
+ // continue around the loop.
+ MachineOperand DbgMO = DbgMI.getOperand(0);
+
+ const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
+ auto CopyOperands = TII.isCopyInstr(SinkInst);
+ if (!CopyOperands)
+ return false;
+ SrcMO = CopyOperands->Source;
+ DstMO = CopyOperands->Destination;
+
+ // Check validity of forwarding this copy.
+ bool PostRA = MRI.getNumVirtRegs() == 0;
+
+ // Trying to forward between physical and virtual registers is too hard.
+ if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual())
+ return false;
+
+ // Only try virtual register copy-forwarding before regalloc, and physical
+ // register copy-forwarding after regalloc.
+ bool arePhysRegs = !DbgMO.getReg().isVirtual();
+ if (arePhysRegs != PostRA)
+ return false;
+
+ // Pre-regalloc, only forward if all subregisters agree (or there are no
+ // subregs at all). More analysis might recover some forwardable copies.
+ if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() ||
+ DbgMO.getSubReg() != DstMO->getSubReg()))
+ return false;
+
+ // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
+ // of this copy. Only forward the copy if the DBG_VALUE operand exactly
+ // matches the copy destination.
+ if (PostRA && DbgMO.getReg() != DstMO->getReg())
+ return false;
+
+ DbgMI.getOperand(0).setReg(SrcMO->getReg());
+ DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg());
+ return true;
+}
+
+/// Sink an instruction and its associated debug instructions.
static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
MachineBasicBlock::iterator InsertPos,
- SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
- // If debug values are provided use those, otherwise call collectDebugValues.
- SmallVector<MachineInstr *, 2> DbgValuesToSink;
- if (DbgVals)
- DbgValuesToSink.insert(DbgValuesToSink.begin(),
- DbgVals->begin(), DbgVals->end());
- else
- MI.collectDebugValues(DbgValuesToSink);
+ SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
// If we cannot find a location to use (merge with), then we erase the debug
// location to prevent debug-info driven tools from potentially reporting
@@ -758,13 +852,19 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
- // Move previously adjacent debug value instructions to the insert position.
+ // Sink a copy of debug users to the insert position. Mark the original
+ // DBG_VALUE location as 'undef', indicating that any earlier variable
+ // location should be terminated as we've optimised away the value at this
+ // point.
for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
DBE = DbgValuesToSink.end();
DBI != DBE; ++DBI) {
MachineInstr *DbgMI = *DBI;
- SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI,
- ++MachineBasicBlock::iterator(DbgMI));
+ MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI);
+ SuccToSinkTo.insert(InsertPos, NewDbgMI);
+
+ if (!attemptDebugCopyProp(MI, *DbgMI))
+ DbgMI->getOperand(0).setReg(0);
}
}
@@ -882,7 +982,36 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
- performSink(MI, *SuccToSinkTo, InsertPos);
+ // Collect debug users of any vreg that this inst defines.
+ SmallVector<MachineInstr *, 4> DbgUsersToSink;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ continue;
+ if (!SeenDbgUsers.count(MO.getReg()))
+ continue;
+
+ // Sink any users that don't pass any other DBG_VALUEs for this variable.
+ auto &Users = SeenDbgUsers[MO.getReg()];
+ for (auto &User : Users) {
+ MachineInstr *DbgMI = User.getPointer();
+ if (User.getInt()) {
+ // This DBG_VALUE would re-order assignments. If we can't copy-propagate
+ // it, it can't be recovered. Set it undef.
+ if (!attemptDebugCopyProp(MI, *DbgMI))
+ DbgMI->getOperand(0).setReg(0);
+ } else {
+ DbgUsersToSink.push_back(DbgMI);
+ }
+ }
+ }
+
+ // After sinking, some debug users may not be dominated any more. If possible,
+ // copy-propagate their operands. As it's expensive, don't do this if there's
+ // no debuginfo in the program.
+ if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
+ SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
+
+ performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
// Conservatively, clear any kill flags, since it's possible that they are no
// longer correct.
@@ -897,6 +1026,41 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
return true;
}
+void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
+ MachineInstr &MI, MachineBasicBlock *TargetBlock) {
+ assert(MI.isCopy());
+ assert(MI.getOperand(1).isReg());
+
+ // Enumerate all users of vreg operands that are def'd. Skip those that will
+ // be sunk. For the rest, if they are not dominated by the block we will sink
+ // MI into, propagate the copy source to them.
+ SmallVector<MachineInstr *, 4> DbgDefUsers;
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ continue;
+ for (auto &User : MRI.use_instructions(MO.getReg())) {
+ if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
+ continue;
+
+ // If is in same block, will either sink or be use-before-def.
+ if (User.getParent() == MI.getParent())
+ continue;
+
+ assert(User.getOperand(0).isReg() &&
+ "DBG_VALUE user of vreg, but non reg operand?");
+ DbgDefUsers.push_back(&User);
+ }
+ }
+
+ // Point the users of this copy that are no longer dominated, at the source
+ // of the copy.
+ for (auto *User : DbgDefUsers) {
+ User->getOperand(0).setReg(MI.getOperand(1).getReg());
+ User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg());
+ }
+}
+
//===----------------------------------------------------------------------===//
// This pass is not intended to be a replacement or a complete alternative
// for the pre-ra machine sink pass. It is only designed to sink COPY
@@ -1051,10 +1215,14 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
SuccBB->removeLiveIn(*S);
for (auto U : UsedOpsInCopy) {
- Register Reg = MI->getOperand(U).getReg();
- if (!SuccBB->isLiveIn(Reg))
- SuccBB->addLiveIn(Reg);
+ Register SrcReg = MI->getOperand(U).getReg();
+ LaneBitmask Mask;
+ for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
+ Mask |= (*S).second;
+ }
+ SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
}
+ SuccBB->sortUniqueLiveIns();
}
static bool hasRegisterDependency(MachineInstr *MI,
@@ -1206,7 +1374,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// block.
clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
+ performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
new file mode 100644
index 000000000000..aff67f9cfd55
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -0,0 +1,122 @@
+//===- MachineSizeOpts.cpp - code size optimization related code ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
+namespace machine_size_opts_detail {
+
+/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
+bool isColdBlock(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getBlockProfileCount(MBB);
+ return Count && PSI->isColdCount(*Count);
+}
+
+/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getBlockProfileCount(MBB);
+ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
+/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
+/// MachineFunction.
+bool isFunctionColdInCallGraph(
+ const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ if (auto FunctionCount = MF->getFunction().getEntryCount())
+ if (!PSI->isColdCount(FunctionCount.getCount()))
+ return false;
+ for (const auto &MBB : *MF)
+ if (!isColdBlock(&MBB, PSI, &MBFI))
+ return false;
+ return true;
+}
+
+/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for
+/// MachineFunction.
+bool isFunctionHotInCallGraphNthPercentile(
+ int PercentileCutoff,
+ const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ if (auto FunctionCount = MF->getFunction().getEntryCount())
+ if (PSI->isHotCountNthPercentile(PercentileCutoff,
+ FunctionCount.getCount()))
+ return true;
+ for (const auto &MBB : *MF)
+ if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
+ return true;
+ return false;
+}
+} // namespace machine_size_opts_detail
+
+namespace {
+struct MachineBasicBlockBFIAdapter {
+ static bool isFunctionColdInCallGraph(const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI);
+ }
+ static bool isFunctionHotInCallGraphNthPercentile(
+ int CutOff,
+ const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo &MBFI) {
+ return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
+ CutOff, MF, PSI, MBFI);
+ }
+ static bool isColdBlock(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
+ }
+ static bool isHotBlockNthPercentile(int CutOff,
+ const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isHotBlockNthPercentile(
+ CutOff, MBB, PSI, MBFI);
+ }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType) {
+ return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ MF, PSI, MBFI, QueryType);
+}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI,
+ PGSOQueryType QueryType) {
+ return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ MBB, PSI, MBFI, QueryType);
+}
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 66a3bc2f8cc4..e6b51b7e1e56 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 969743edca52..6c0402df8489 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -59,6 +59,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -123,8 +124,8 @@ namespace {
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
if (Register::isPhysicalRegister(Reg))
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- RV.push_back(*SubRegs);
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg))
+ RV.push_back(SubReg);
}
struct BBInfo {
@@ -801,18 +802,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB live-in list contains non-physical register", MBB);
continue;
}
- for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
+ for (const MCPhysReg &SubReg : TRI->subregs_inclusive(LI.PhysReg))
+ regsLive.insert(SubReg);
}
}
const MachineFrameInfo &MFI = MF->getFrameInfo();
BitVector PR = MFI.getPristineRegs(*MF);
for (unsigned I : PR.set_bits()) {
- for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- regsLive.insert(*SubRegs);
+ for (const MCPhysReg &SubReg : TRI->subregs_inclusive(I))
+ regsLive.insert(SubReg);
}
regsKilled.clear();
@@ -1100,7 +1099,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- case TargetOpcode::G_GEP: {
+ case TargetOpcode::G_PTR_ADD: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
@@ -1408,18 +1407,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- const Constant *Mask = MaskOp.getShuffleMask();
- auto *MaskVT = dyn_cast<VectorType>(Mask->getType());
- if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) {
- report("Invalid shufflemask constant type", MI);
- break;
- }
-
- if (!Mask->getAggregateElement(0u)) {
- report("Invalid shufflemask constant type", MI);
- break;
- }
-
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg());
LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg());
@@ -1435,8 +1422,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
- SmallVector<int, 32> MaskIdxes;
- ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes);
+ ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask();
if (static_cast<int>(MaskIdxes.size()) != DstNumElts)
report("Wrong result type for shufflemask", MI);
@@ -1609,13 +1595,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
} else if (MONum < MCID.getNumOperands()) {
const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
// Don't check if it's the last operand in a variadic instruction. See,
- // e.g., LDM_RET in the arm back end.
- if (MO->isReg() &&
- !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
- if (MO->isDef() && !MCOI.isOptionalDef())
- report("Explicit operand marked as def", MO, MONum);
- if (MO->isImplicit())
- report("Explicit operand marked as implicit", MO, MONum);
+ // e.g., LDM_RET in the arm back end. Check non-variadic operands only.
+ bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1;
+ if (!IsOptional) {
+ if (MO->isReg()) {
+ if (MO->isDef() && !MCOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
+ if (MO->isImplicit())
+ report("Explicit operand marked as implicit", MO, MONum);
+ }
+
+ // Check that an instruction has register operands only as expected.
+ if (MCOI.OperandType == MCOI::OPERAND_REGISTER &&
+ !MO->isReg() && !MO->isFI())
+ report("Expected a register operand.", MO, MONum);
+ if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||
+ MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg())
+ report("Expected a non-register operand.", MO, MONum);
}
int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
@@ -2005,9 +2001,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
if (Bad) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid();
- ++SubRegs) {
- if (regsLive.count(*SubRegs)) {
+
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg)) {
+ if (regsLive.count(SubReg)) {
Bad = false;
break;
}
@@ -2025,9 +2021,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!Register::isPhysicalRegister(MOP.getReg()))
continue;
- for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
- ++SubRegs) {
- if (*SubRegs == Reg) {
+ for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) {
+ if (SubReg == Reg) {
Bad = false;
break;
}
@@ -2304,6 +2299,32 @@ void MachineVerifier::visitMachineFunctionAfter() {
if (LiveInts)
verifyLiveIntervals();
+ // Check live-in list of each MBB. If a register is live into MBB, check
+ // that the register is in regsLiveOut of each predecessor block. Since
+ // this must come from a definition in the predecesssor or its live-in
+ // list, this will catch a live-through case where the predecessor does not
+ // have the register in its live-in list. This currently only checks
+ // registers that have no aliases, are not allocatable and are not
+ // reserved, which could mean a condition code register for instance.
+ if (MRI->tracksLiveness())
+ for (const auto &MBB : *MF)
+ for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+ MCPhysReg LiveInReg = P.PhysReg;
+ bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid();
+ if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg))
+ continue;
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ BBInfo &PInfo = MBBInfoMap[Pred];
+ if (!PInfo.regsLiveOut.count(LiveInReg)) {
+ report("Live in register not found to be live out from predecessor.",
+ &MBB);
+ errs() << TRI->getName(LiveInReg)
+ << " not found to be live out from "
+ << printMBBReference(*Pred) << "\n";
+ }
+ }
+ }
+
for (auto CSInfo : MF->getCallSitesInfo())
if (!CSInfo.first->isCall())
report("Call site info referencing instruction that is not call", MF);
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index d21eae222af0..d2ee21c8720f 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) {
return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
}
+static SUnit *getPredClusterSU(const SUnit &SU) {
+ for (const SDep &SI : SU.Preds)
+ if (SI.isCluster())
+ return SI.getSUnit();
+
+ return nullptr;
+}
+
+static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+ unsigned Num = 1;
+ const SUnit *CurrentSU = &SU;
+ while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
+ return Num < FuseLimit;
+}
+
static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
@@ -56,6 +71,14 @@ static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
return false;
+ // TODO - If we want to chain more than two instructions, we need to create
+ // artifical edges to make dependencies from the FirstSU also dependent
+ // on other chained instructions, and other chained instructions also
+ // dependent on the dependencies of the SecondSU, to prevent them from being
+ // scheduled into these chained instructions.
+ assert(hasLessThanNumFused(FirstSU, 2) &&
+ "Currently we only support chaining together two instructions");
+
// Adjust the latency between both instrs.
for (SDep &SI : FirstSU.Succs)
if (SI.getSUnit() == &SecondSU)
@@ -161,8 +184,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
if (DepSU.isBoundaryNode())
continue;
+ // Only chain two instructions together at most.
const MachineInstr *DepMI = DepSU.getInstr();
- if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ if (!hasLessThanNumFused(DepSU, 2) ||
+ !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
continue;
if (fuseInstructionPair(DAG, DepSU, AnchorSU))
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 7ce3c5861801..163e52d9199d 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/MachineLoopUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -1189,7 +1190,7 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
if (!Phi.isPHI())
return false;
- unsigned DefCycle = Schedule.getCycle(&Phi);
+ int DefCycle = Schedule.getCycle(&Phi);
int DefStage = Schedule.getStage(&Phi);
unsigned InitVal = 0;
@@ -1198,7 +1199,7 @@ bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
MachineInstr *Use = MRI.getVRegDef(LoopVal);
if (!Use || Use->isPHI())
return true;
- unsigned LoopCycle = Schedule.getCycle(Use);
+ int LoopCycle = Schedule.getCycle(Use);
int LoopStage = Schedule.getStage(Use);
return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
}
@@ -1214,7 +1215,7 @@ namespace {
// Remove any dead phis in MBB. Dead phis either have only one block as input
// (in which case they are the identity) or have no uses.
void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
- LiveIntervals *LIS) {
+ LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
bool Changed = true;
while (Changed) {
Changed = false;
@@ -1226,7 +1227,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
Changed = true;
- } else if (MI.getNumExplicitOperands() == 3) {
+ } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
MRI.constrainRegClass(MI.getOperand(1).getReg(),
MRI.getRegClass(MI.getOperand(0).getReg()));
MRI.replaceRegWith(MI.getOperand(0).getReg(),
@@ -1582,6 +1583,133 @@ PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
return NewBB;
}
+void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
+ int MinStage) {
+ for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
+ I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
+ MachineInstr *MI = &*I++;
+ int Stage = getStage(MI);
+ if (Stage == -1 || Stage >= MinStage)
+ continue;
+
+ for (MachineOperand &DefMO : MI->defs()) {
+ SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+ for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+ // Only PHIs can use values from this block by construction.
+ // Match with the equivalent PHI in B.
+ assert(UseMI.isPHI());
+ Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+ MI->getParent());
+ Subs.emplace_back(&UseMI, Reg);
+ }
+ for (auto &Sub : Subs)
+ Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ }
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+}
+
+void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
+ MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
+ auto InsertPt = DestBB->getFirstNonPHI();
+ DenseMap<Register, Register> Remaps;
+ for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
+ MachineInstr *MI = &*I++;
+ if (MI->isPHI()) {
+ // This is an illegal PHI. If we move any instructions using an illegal
+ // PHI, we need to create a legal Phi
+ Register PhiR = MI->getOperand(0).getReg();
+ auto RC = MRI.getRegClass(PhiR);
+ Register NR = MRI.createVirtualRegister(RC);
+ MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NR)
+ .addReg(PhiR)
+ .addMBB(SourceBB);
+ BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[MI];
+ Remaps[PhiR] = NR;
+ continue;
+ }
+ if (getStage(MI) != Stage)
+ continue;
+ MI->removeFromParent();
+ DestBB->insert(InsertPt, MI);
+ auto *KernelMI = CanonicalMIs[MI];
+ BlockMIs[{DestBB, KernelMI}] = MI;
+ BlockMIs.erase({SourceBB, KernelMI});
+ }
+ SmallVector<MachineInstr *, 4> PhiToDelete;
+ for (MachineInstr &MI : DestBB->phis()) {
+ assert(MI.getNumOperands() == 3);
+ MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
+ // If the instruction referenced by the phi is moved inside the block
+ // we don't need the phi anymore.
+ if (getStage(Def) == Stage) {
+ Register PhiReg = MI.getOperand(0).getReg();
+ MRI.replaceRegWith(MI.getOperand(0).getReg(),
+ Def->getOperand(0).getReg());
+ MI.getOperand(0).setReg(PhiReg);
+ PhiToDelete.push_back(&MI);
+ }
+ }
+ for (auto *P : PhiToDelete)
+ P->eraseFromParent();
+ InsertPt = DestBB->getFirstNonPHI();
+ // Helper to clone Phi instructions into the destination block. We clone Phi
+ // greedily to avoid combinatorial explosion of Phi instructions.
+ auto clonePhi = [&](MachineInstr *Phi) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
+ DestBB->insert(InsertPt, NewMI);
+ Register OrigR = Phi->getOperand(0).getReg();
+ Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+ NewMI->getOperand(0).setReg(R);
+ NewMI->getOperand(1).setReg(OrigR);
+ NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
+ Remaps[OrigR] = R;
+ CanonicalMIs[NewMI] = CanonicalMIs[Phi];
+ BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
+ PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
+ return R;
+ };
+ for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
+ for (MachineOperand &MO : I->uses()) {
+ if (!MO.isReg())
+ continue;
+ if (Remaps.count(MO.getReg()))
+ MO.setReg(Remaps[MO.getReg()]);
+ else {
+ // If we are using a phi from the source block we need to add a new phi
+ // pointing to the old one.
+ MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
+ if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
+ Register R = clonePhi(Use);
+ MO.setReg(R);
+ }
+ }
+ }
+ }
+}
+
+Register
+PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
+ MachineInstr *Phi) {
+ unsigned distance = PhiNodeLoopIteration[Phi];
+ MachineInstr *CanonicalUse = CanonicalPhi;
+ for (unsigned I = 0; I < distance; ++I) {
+ assert(CanonicalUse->isPHI());
+ assert(CanonicalUse->getNumOperands() == 5);
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
+ std::swap(LoopRegIdx, InitRegIdx);
+ CanonicalUse =
+ MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg());
+ }
+ return CanonicalUse->getOperand(0).getReg();
+}
+
void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
BitVector LS(Schedule.getNumStages(), true);
BitVector AS(Schedule.getNumStages(), true);
@@ -1604,26 +1732,45 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// property that any value deffed in BB but used outside of BB is used by a
// PHI in the exiting block.
MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
-
+ EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
// Push out the epilogs, again in reverse order.
// We can't assume anything about the minumum loop trip count at this point,
- // so emit a fairly complex epilog:
- // K[0, 1, 2] // Kernel runs stages 0, 1, 2
- // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0].
- // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
- //
- // This creates a single-successor single-predecessor sequence of blocks for
- // each epilog, which are kept this way for simplicity at this stage and
- // cleaned up by the optimizer later.
+ // so emit a fairly complex epilog.
+
+ // We first peel number of stages minus one epilogue. Then we remove dead
+ // stages and reorder instructions based on their stage. If we have 3 stages
+ // we generate first:
+ // E0[3, 2, 1]
+ // E1[3', 2']
+ // E2[3'']
+ // And then we move instructions based on their stages to have:
+ // E0[3]
+ // E1[2, 3']
+ // E2[1, 2', 3'']
+ // The transformation is legal because we only move instructions past
+ // instructions of a previous loop iteration.
for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
- Epilogs.push_back(nullptr);
- for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
- LS.reset();
- LS[J] = 1;
- Epilogs.back() = peelKernel(LPD_Back);
- LiveStages[Epilogs.back()] = LS;
- AvailableStages[Epilogs.back()] = AS;
+ Epilogs.push_back(peelKernel(LPD_Back));
+ MachineBasicBlock *B = Epilogs.back();
+ filterInstructions(B, Schedule.getNumStages() - I);
+ // Keep track at which iteration each phi belongs to. We need it to know
+ // what version of the variable to use during prologue/epilogue stitching.
+ EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
+ for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
+ PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
+ }
+ for (size_t I = 0; I < Epilogs.size(); I++) {
+ LS.reset();
+ for (size_t J = I; J < Epilogs.size(); J++) {
+ int Iteration = J;
+ unsigned Stage = Schedule.getNumStages() - 1 + I - J;
+ // Move stage one block at a time so that Phi nodes are updated correctly.
+ for (size_t K = Iteration; K > I; K--)
+ moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
+ LS[Stage] = 1;
}
+ LiveStages[Epilogs[I]] = LS;
+ AvailableStages[Epilogs[I]] = AS;
}
// Now we've defined all the prolog and epilog blocks as a fallthrough
@@ -1638,8 +1785,16 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
for (MachineInstr &MI : (*EI)->phis()) {
Register Reg = MI.getOperand(1).getReg();
MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
- if (Use && Use->getParent() == Pred)
+ if (Use && Use->getParent() == Pred) {
+ MachineInstr *CanonicalUse = CanonicalMIs[Use];
+ if (CanonicalUse->isPHI()) {
+ // If the use comes from a phi we need to skip as many phi as the
+ // distance between the epilogue and the kernel. Trace through the phi
+ // chain to find the right value.
+ Reg = getPhiCanonicalReg(CanonicalUse, Use);
+ }
Reg = getEquivalentRegisterIn(Reg, *PI);
+ }
MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
MI.addOperand(MachineOperand::CreateMBB(*PI));
}
@@ -1659,6 +1814,13 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
rewriteUsesOf(MI);
}
}
+ for (auto *MI : IllegalPhisToDelete) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ IllegalPhisToDelete.clear();
+
// Now all remapping has been done, we're free to optimize the generated code.
for (MachineBasicBlock *B : reverse(Blocks))
EliminateDeadPhis(B, MRI, LIS);
@@ -1727,9 +1889,10 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
R = MI->getOperand(1).getReg();
MRI.setRegClass(R, MRI.getRegClass(PhiR));
MRI.replaceRegWith(PhiR, R);
- if (LIS)
- LIS->RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ // Postpone deleting the Phi as it may be referenced by BlockMIs and used
+ // later to figure out how to remap registers.
+ MI->getOperand(0).setReg(PhiR);
+ IllegalPhisToDelete.push_back(MI);
return;
}
@@ -1759,10 +1922,6 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
}
void PeelingModuloScheduleExpander::fixupBranches() {
- std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
- TII->analyzeLoopForPipelining(BB);
- assert(Info);
-
// Work outwards from the kernel.
bool KernelDisposed = false;
int TC = Schedule.getNumStages() - 1;
@@ -1818,6 +1977,8 @@ void PeelingModuloScheduleExpander::expand() {
BB = Schedule.getLoop()->getTopBlock();
Preheader = Schedule.getLoop()->getLoopPreheader();
LLVM_DEBUG(Schedule.dump());
+ Info = TII->analyzeLoopForPipelining(BB);
+ assert(Info);
rewriteKernel();
peelPrologAndEpilogs();
diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
new file mode 100644
index 000000000000..9ed3471c0fc9
--- /dev/null
+++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -0,0 +1,54 @@
+//===-- NonRelocatableStringpool.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/NonRelocatableStringpool.h"
+
+namespace llvm {
+
+DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
+ if (S.empty() && !Strings.empty())
+ return EmptyString;
+
+ if (Translator)
+ S = Translator(S);
+ auto I = Strings.insert({S, DwarfStringPoolEntry()});
+ auto &Entry = I.first->second;
+ if (I.second || !Entry.isIndexed()) {
+ Entry.Index = NumEntries++;
+ Entry.Offset = CurrentEndOffset;
+ Entry.Symbol = nullptr;
+ CurrentEndOffset += S.size() + 1;
+ }
+ return DwarfStringPoolEntryRef(*I.first, true);
+}
+
+StringRef NonRelocatableStringpool::internString(StringRef S) {
+ DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
+
+ if (Translator)
+ S = Translator(S);
+
+ auto InsertResult = Strings.insert({S, Entry});
+ return InsertResult.first->getKey();
+}
+
+std::vector<DwarfStringPoolEntryRef>
+NonRelocatableStringpool::getEntriesForEmission() const {
+ std::vector<DwarfStringPoolEntryRef> Result;
+ Result.reserve(Strings.size());
+ for (const auto &E : Strings)
+ if (E.getValue().isIndexed())
+ Result.emplace_back(E, true);
+ llvm::sort(Result, [](const DwarfStringPoolEntryRef A,
+ const DwarfStringPoolEntryRef B) {
+ return A.getIndex() < B.getIndex();
+ });
+ return Result;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp
index 1a493964e678..02a70ab801e9 100644
--- a/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <cassert>
diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp
index e4c73658cb4f..7dbd830666fb 100644
--- a/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/llvm/lib/CodeGen/ParallelCG.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
- TargetMachine::CodeGenFileType FileType) {
+ CodeGenFileType FileType) {
std::unique_ptr<TargetMachine> TM = TMFactory();
legacy::PassManager CodeGenPasses;
if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
@@ -38,7 +38,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
- TargetMachine::CodeGenFileType FileType, bool PreserveLocals) {
+ CodeGenFileType FileType, bool PreserveLocals) {
assert(BCOSs.empty() || BCOSs.size() == OSs.size());
if (OSs.size() == 1) {
diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp
index 529fde84e39a..1d6069c50554 100644
--- a/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
@@ -54,6 +55,15 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {
}
bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+ if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
+ MachineBasicBlock &FirstMBB = *MF.begin();
+ MachineInstr &FirstMI = *FirstMBB.begin();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+ return true;
+ }
+
if (!MF.getFunction().hasFnAttribute("patchable-function"))
return false;
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 54f1d38ed106..c9c279cf0ddf 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -84,6 +84,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 0a3838617bc5..4f88f4d3dd6a 100644
--- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 5bea9f2893c9..d68959935cec 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -38,6 +38,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -77,7 +78,7 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
public:
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 0d2f6f99ca96..1ff4e7cbd8fb 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -12,14 +12,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -56,6 +58,17 @@ static bool lowerLoadRelative(Function &F) {
return Changed;
}
+// ObjCARC has knowledge about whether an obj-c runtime function needs to be
+// always tail-called or never tail-called.
+static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
+ objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(&F);
+ if (objcarc::IsAlwaysTail(Kind))
+ return CallInst::TCK_Tail;
+ else if (objcarc::IsNeverTail(Kind))
+ return CallInst::TCK_NoTail;
+ return CallInst::TCK_None;
+}
+
static bool lowerObjCCall(Function &F, const char *NewFn,
bool setNonLazyBind = false) {
if (F.use_empty())
@@ -75,6 +88,8 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
}
}
+ CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
+
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
auto *CI = cast<CallInst>(I->getUser());
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
@@ -84,7 +99,17 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
CallInst *NewCI = Builder.CreateCall(FCache, Args);
NewCI->setName(CI->getName());
- NewCI->setTailCallKind(CI->getTailCallKind());
+
+ // Try to set the most appropriate TailCallKind based on both the current
+ // attributes and the ones that we could get from ObjCARC's special
+ // knowledge of the runtime functions.
+ //
+ // std::max respects both requirements of notail and tail here:
+ // * notail on either the call or from ObjCARC becomes notail
+ // * tail on either side is stronger than none, but not notail
+ CallInst::TailCallKind TCK = CI->getTailCallKind();
+ NewCI->setTailCallKind(std::max(TCK, OverridingTCK));
+
if (!CI->use_empty())
CI->replaceAllUsesWith(NewCI);
CI->eraseFromParent();
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 11bff45f9ad5..ed19f7448151 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 729f06dda62b..3909b5717281 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -51,6 +51,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 2850033e6419..3c1f9905afd0 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -132,8 +133,6 @@ void ReachingDefAnalysis::processBasicBlock(
}
bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
- if (skipFunction(mf.getFunction()))
- return false;
MF = &mf;
TRI = MF->getSubtarget().getRegisterInfo();
@@ -189,7 +188,145 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
return LatestDef;
}
+MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) {
+ return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg));
+}
+
+bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
+ int PhysReg) {
+ MachineBasicBlock *ParentA = A->getParent();
+ MachineBasicBlock *ParentB = B->getParent();
+ if (ParentA != ParentB)
+ return false;
+
+ return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
+ int InstId) {
+ assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() &&
+ "Unexpected basic block number.");
+ assert(InstId < static_cast<int>(MBB->size()) &&
+ "Unexpected instruction id.");
+
+ if (InstId < 0)
+ return nullptr;
+
+ for (auto &MI : *MBB) {
+ if (InstIds.count(&MI) && InstIds[&MI] == InstId)
+ return &MI;
+ }
+ return nullptr;
+}
+
int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) {
assert(InstIds.count(MI) && "Unexpected machine instuction.");
return InstIds[MI] - getReachingDef(MI, PhysReg);
}
+
+void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
+ SmallVectorImpl<MachineInstr*> &Uses) {
+ MachineBasicBlock *MBB = Def->getParent();
+ MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
+ while (++MI != MBB->end()) {
+ // If/when we find a new reaching def, we know that there's no more uses
+ // of 'Def'.
+ if (getReachingMIDef(&*MI, PhysReg) != Def)
+ return;
+
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg)
+ continue;
+
+ Uses.push_back(&*MI);
+ if (MO.isKill())
+ return;
+ }
+ }
+}
+
+unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) {
+ SmallVector<MachineInstr*, 4> Uses;
+ getReachingLocalUses(Def, PhysReg, Uses);
+ return Uses.size();
+}
+
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) {
+ MachineBasicBlock *MBB = MI->getParent();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+
+ // Yes if the register is live out of the basic block.
+ if (LiveRegs.contains(PhysReg))
+ return true;
+
+ // Walk backwards through the block to see if the register is live at some
+ // point.
+ for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) {
+ LiveRegs.stepBackward(*Last);
+ if (LiveRegs.contains(PhysReg))
+ return InstIds[&*Last] > InstIds[MI];
+ }
+ return false;
+}
+
+bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) {
+ MachineBasicBlock *MBB = MI->getParent();
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (!LiveRegs.contains(PhysReg))
+ return false;
+
+ MachineInstr *Last = &MBB->back();
+ int Def = getReachingDef(MI, PhysReg);
+ if (getReachingDef(Last, PhysReg) != Def)
+ return false;
+
+ // Finally check that the last instruction doesn't redefine the register.
+ for (auto &MO : Last->operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+ return false;
+
+ return true;
+}
+
+MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+ int PhysReg) {
+ LivePhysRegs LiveRegs(*TRI);
+ LiveRegs.addLiveOuts(*MBB);
+ if (!LiveRegs.contains(PhysReg))
+ return nullptr;
+
+ MachineInstr *Last = &MBB->back();
+ int Def = getReachingDef(Last, PhysReg);
+ for (auto &MO : Last->operands())
+ if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+ return Last;
+
+ return Def < 0 ? nullptr : getInstFromId(MBB, Def);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI,
+ int PhysReg) {
+ auto I = MachineBasicBlock::reverse_iterator(MI);
+ auto E = MI->getParent()->rend();
+ I++;
+
+ for ( ; I != E; I++)
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.isUse() && MO.getReg() == PhysReg)
+ return &*I;
+
+ return nullptr;
+}
+
+void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI,
+ int PhysReg, SmallVectorImpl<MachineInstr*> &Uses) {
+ MachineInstr *Use = nullptr;
+ MachineInstr *Pos = MI;
+
+ while ((Use = getInstWithUseBefore(Pos, PhysReg))) {
+ Uses.push_back(Use);
+ Pos = Use;
+ }
+}
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 44d0233604e7..89b5bcebd61c 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -82,12 +83,12 @@ namespace {
/// Everything we know about a live virtual register.
struct LiveReg {
MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
- unsigned VirtReg; ///< Virtual register number.
+ Register VirtReg; ///< Virtual register number.
MCPhysReg PhysReg = 0; ///< Currently held here.
unsigned short LastOpNum = 0; ///< OpNum on LastUse.
bool Dirty = false; ///< Register needs spill.
- explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
+ explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
unsigned getSparseSetIndex() const {
return Register::virtReg2Index(VirtReg);
@@ -128,7 +129,7 @@ namespace {
/// Maps each physical register to a RegState enum or a virtual register.
std::vector<unsigned> PhysRegState;
- SmallVector<unsigned, 16> VirtDead;
+ SmallVector<Register, 16> VirtDead;
SmallVector<MachineInstr *, 32> Coalesced;
using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
@@ -184,14 +185,14 @@ namespace {
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
void handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<unsigned> &VirtDead);
+ SmallVectorImpl<Register> &VirtDead);
bool isLastUseOfLocalReg(const MachineOperand &MO) const;
void addKillFlag(const LiveReg &LRI);
void killVirtReg(LiveReg &LR);
- void killVirtReg(unsigned VirtReg);
+ void killVirtReg(Register VirtReg);
void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
- void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
void usePhysReg(MachineOperand &MO);
void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
@@ -199,34 +200,34 @@ namespace {
unsigned calcSpillCost(MCPhysReg PhysReg) const;
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
- LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+ LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+ LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const {
return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
- void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
void allocVirtRegUndef(MachineOperand &MO);
- MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
- unsigned Hint);
- LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
- unsigned Hint);
+ MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ Register Hint);
+ LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ Register Hint);
void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
- unsigned traceCopies(unsigned VirtReg) const;
- unsigned traceCopyChain(unsigned Reg) const;
+ Register traceCopies(Register VirtReg) const;
+ Register traceCopyChain(Register Reg) const;
- int getStackSpaceFor(unsigned VirtReg);
- void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ int getStackSpaceFor(Register VirtReg);
+ void spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill);
- void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+ void reload(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg PhysReg);
- bool mayLiveOut(unsigned VirtReg);
- bool mayLiveIn(unsigned VirtReg);
+ bool mayLiveOut(Register VirtReg);
+ bool mayLiveIn(Register VirtReg);
void dumpState();
};
@@ -244,7 +245,7 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
/// This allocates space for the specified virtual register to be held on the
/// stack.
-int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
+int RegAllocFast::getStackSpaceFor(Register VirtReg) {
// Find the location Reg would belong...
int SS = StackSlotForVirtReg[VirtReg];
// Already has space allocated?
@@ -263,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
}
/// Returns false if \p VirtReg is known to not live out of the current block.
-bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+bool RegAllocFast::mayLiveOut(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
@@ -292,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
}
/// Returns false if \p VirtReg is known to not be live into the current block.
-bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+bool RegAllocFast::mayLiveIn(Register VirtReg) {
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
return !MBB->pred_empty();
@@ -311,7 +312,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
-void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill) {
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
<< " in " << printReg(AssignedReg, TRI));
@@ -339,7 +340,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
}
/// Insert reload instruction for \p PhysReg before \p Before.
-void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg PhysReg) {
LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
<< printReg(PhysReg, TRI) << '\n');
@@ -393,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) {
}
/// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(unsigned VirtReg) {
+void RegAllocFast::killVirtReg(Register VirtReg) {
assert(Register::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -404,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
/// This method spills the value specified by VirtReg into the corresponding
/// stack slot if needed.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
- unsigned VirtReg) {
+ Register VirtReg) {
assert(Register::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -456,7 +457,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
return;
Register PhysReg = MO.getReg();
- assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand");
+ assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
@@ -520,7 +521,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
MCPhysReg PhysReg, RegState NewState) {
markRegUsedInInstr(PhysReg);
- switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
default:
@@ -536,7 +537,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
setPhysRegState(PhysReg, NewState);
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
- switch (unsigned VirtReg = PhysRegState[Alias]) {
+ switch (Register VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
default:
@@ -562,7 +563,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
<< " is already used in instr.\n");
return spillImpossible;
}
- switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ switch (Register VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
case regFree:
@@ -584,7 +585,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
MCPhysReg Alias = *AI;
- switch (unsigned VirtReg = PhysRegState[Alias]) {
+ switch (Register VirtReg = PhysRegState[Alias]) {
case regDisabled:
break;
case regFree:
@@ -608,7 +609,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
/// proper container for VirtReg now. The physical register must not be used
/// for anything else when this is called.
void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
- unsigned VirtReg = LR.VirtReg;
+ Register VirtReg = LR.VirtReg;
LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
<< printReg(PhysReg, TRI) << '\n');
assert(LR.PhysReg == 0 && "Already assigned a physreg");
@@ -621,13 +622,13 @@ static bool isCoalescable(const MachineInstr &MI) {
return MI.isFullCopy();
}
-unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
+Register RegAllocFast::traceCopyChain(Register Reg) const {
static const unsigned ChainLengthLimit = 3;
unsigned C = 0;
do {
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
return Reg;
- assert(Register::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
if (!VRegDef || !isCoalescable(*VRegDef))
@@ -640,26 +641,26 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
/// chain of copies to check whether we reach a physical register we can
/// coalesce with.
-unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
+Register RegAllocFast::traceCopies(Register VirtReg) const {
static const unsigned DefLimit = 3;
unsigned C = 0;
for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
if (isCoalescable(MI)) {
Register Reg = MI.getOperand(1).getReg();
Reg = traceCopyChain(Reg);
- if (Reg != 0)
+ if (Reg.isValid())
return Reg;
}
if (++C >= DefLimit)
break;
}
- return 0;
+ return Register();
}
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
- const unsigned VirtReg = LR.VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
+ const Register VirtReg = LR.VirtReg;
assert(Register::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
@@ -670,7 +671,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) &&
+ if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
RC.contains(Hint0)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint0);
@@ -686,12 +687,12 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< "occupied\n");
}
} else {
- Hint0 = 0;
+ Hint0 = Register();
}
// Try other hint.
- unsigned Hint1 = traceCopies(VirtReg);
- if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) &&
+ Register Hint1 = traceCopies(VirtReg);
+ if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint1);
@@ -707,7 +708,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< "occupied\n");
}
} else {
- Hint1 = 0;
+ Hint1 = Register();
}
MCPhysReg BestReg = 0;
@@ -775,14 +776,14 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Allocates a register for VirtReg and mark it as dirty.
MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
- unsigned VirtReg, unsigned Hint) {
+ Register VirtReg, Register Hint) {
assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
- if ((!Hint || !Register::isPhysicalRegister(Hint)) &&
+ if ((!Hint || !Hint.isPhysical()) &&
MRI->hasOneNonDBGUse(VirtReg)) {
const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
// It's a copy, use the destination register as a hint.
@@ -807,8 +808,8 @@ MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
/// Make sure VirtReg is available in a physreg and return it.
RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
unsigned OpNum,
- unsigned VirtReg,
- unsigned Hint) {
+ Register VirtReg,
+ Register Hint) {
assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
@@ -884,13 +885,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
// Handles special instruction operand like early clobbers and tied ops when
// there are additional physreg defines.
void RegAllocFast::handleThroughOperands(MachineInstr &MI,
- SmallVectorImpl<unsigned> &VirtDead) {
+ SmallVectorImpl<Register> &VirtDead) {
LLVM_DEBUG(dbgs() << "Scanning for through registers:");
- SmallSet<unsigned, 8> ThroughRegs;
+ SmallSet<Register, 8> ThroughRegs;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
(MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
@@ -905,7 +906,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg))
+ if (!Reg || !Reg.isPhysical())
continue;
markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
@@ -914,7 +915,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
}
- SmallVector<unsigned, 8> PartialDefs;
+ SmallVector<Register, 8> PartialDefs;
LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
@@ -961,7 +962,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg))
+ if (!Reg || !Reg.isPhysical())
continue;
LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
<< " as used in instr\n");
@@ -969,7 +970,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
}
// Also mark PartialDefs as used to avoid reallocation.
- for (unsigned PartialDef : PartialDefs)
+ for (Register PartialDef : PartialDefs)
markRegUsedInInstr(PartialDef);
}
@@ -1002,7 +1003,7 @@ void RegAllocFast::dumpState() {
e = LiveVirtRegs.end(); i != e; ++i) {
if (!i->PhysReg)
continue;
- assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key");
+ assert(i->VirtReg.isVirtual() && "Bad map key");
assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
@@ -1013,8 +1014,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
const MCInstrDesc &MCID = MI.getDesc();
// If this is a copy, we may be able to coalesce.
- unsigned CopySrcReg = 0;
- unsigned CopyDstReg = 0;
+ Register CopySrcReg;
+ Register CopyDstReg;
unsigned CopySrcSub = 0;
unsigned CopyDstSub = 0;
if (MI.isCopy()) {
@@ -1082,7 +1083,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
(hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
handleThroughOperands(MI, VirtDead);
// Don't attempt coalescing when we have funny stuff going on.
- CopyDstReg = 0;
+ CopyDstReg = Register();
// Pretend we have early clobbers so the use operands get marked below.
// This is not necessary for the common case of a single tied use.
hasEarlyClobbers = true;
@@ -1095,7 +1096,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
if (MO.isUse()) {
if (MO.isUndef()) {
@@ -1124,7 +1125,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
- if (!Register::isVirtualRegister(Reg))
+ if (!Reg.isVirtual())
continue;
assert(MO.isUndef() && "Should only have undef virtreg uses left");
@@ -1139,7 +1140,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg))
+ if (!Reg || !Reg.isPhysical())
continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MO.isTied()) continue;
@@ -1168,7 +1169,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
Register Reg = MO.getReg();
- if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg))
+ if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
continue;
definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
}
@@ -1182,12 +1183,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
Register Reg = MO.getReg();
// We have already dealt with phys regs in the previous scan.
- if (Register::isPhysicalRegister(Reg))
+ if (Reg.isPhysical())
continue;
MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
VirtDead.push_back(Reg);
- CopyDstReg = 0; // cancel coalescing;
+ CopyDstReg = Register(); // cancel coalescing;
} else
CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
}
@@ -1196,7 +1197,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// register are allocated identically. We didn't need to do this for uses
// because we are crerating our own kill flags, and they are always at the
// last use.
- for (unsigned VirtReg : VirtDead)
+ for (Register VirtReg : VirtDead)
killVirtReg(VirtReg);
VirtDead.clear();
@@ -1234,7 +1235,7 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
// We can't allocate a physreg for a DebugValue, sorry!
LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
- MO.setReg(0);
+ MO.setReg(Register());
}
// If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1252,7 +1253,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MII = MBB.begin();
// Add live-in registers as live.
- for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
if (MRI->isAllocatable(LI.PhysReg))
definePhysReg(MII, LI.PhysReg, regReserved);
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index d27db678f02a..27de7fe45887 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -3126,6 +3126,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
spiller().spill(LRE);
setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+ // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
+ // the new regs are kept in LDV (still mapping to the old register), until
+ // we rewrite spilled locations in LDV at a later stage.
+ DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS);
+
if (VerifyEnabled)
MF->verify(this, "After spilling");
}
@@ -3220,8 +3225,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
MF->getSubtarget().enableRALocalReassignment(
MF->getTarget().getOptLevel());
- EnableAdvancedRASplitCost = ConsiderLocalIntervalCost ||
- MF->getSubtarget().enableAdvancedRASplitCost();
+ EnableAdvancedRASplitCost =
+ ConsiderLocalIntervalCost.getNumOccurrences()
+ ? ConsiderLocalIntervalCost
+ : MF->getSubtarget().enableAdvancedRASplitCost();
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 757ff0e44953..5a79ac44dcf4 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -56,7 +56,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
- // Call determineCalleeSaves and then also set the bits for subregs and
+ // Call getCalleeSaves and then also set the bits for subregs and
// fully saved superregs.
static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
@@ -199,7 +199,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
// Target will return the set of registers that it saves/restores as needed.
SavedRegs.clear();
- TFI.determineCalleeSaves(MF, SavedRegs);
+ TFI.getCalleeSaves(MF, SavedRegs);
if (SavedRegs.none())
return;
diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 530e0cccf1d4..1523bd4d1649 100644
--- a/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -59,7 +59,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
if (Update || CSR != CalleeSavedRegs) {
// Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+ CalleeSavedAliases.assign(TRI->getNumRegs(), 0);
for (const MCPhysReg *I = CSR; *I; ++I)
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
CalleeSavedAliases[*AI] = *I;
@@ -186,6 +186,7 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
NumRCUnits = NUnits;
}
}
+ assert(RC && "Failed to find register class");
compute(RC);
unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
return TRI->getRegPressureSetLimit(*MF, Idx) -
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 6ff5ddbc023d..a3f75d82d0ec 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -119,32 +120,46 @@ static cl::opt<unsigned> LargeIntervalFreqThreshold(
namespace {
+ class JoinVals;
+
class RegisterCoalescer : public MachineFunctionPass,
private LiveRangeEdit::Delegate {
- MachineFunction* MF;
- MachineRegisterInfo* MRI;
- const TargetRegisterInfo* TRI;
- const TargetInstrInfo* TII;
- LiveIntervals *LIS;
- const MachineLoopInfo* Loops;
- AliasAnalysis *AA;
+ MachineFunction* MF = nullptr;
+ MachineRegisterInfo* MRI = nullptr;
+ const TargetRegisterInfo* TRI = nullptr;
+ const TargetInstrInfo* TII = nullptr;
+ LiveIntervals *LIS = nullptr;
+ const MachineLoopInfo* Loops = nullptr;
+ AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
+ /// Debug variable location tracking -- for each VReg, maintain an
+ /// ordered-by-slot-index set of DBG_VALUEs, to help quick
+ /// identification of whether coalescing may change location validity.
+ using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
+ DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues;
+
+ /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
+ /// To avoid repeatedly merging sets of DbgValueLocs, instead record
+ /// which vregs have been coalesced, and where to. This map is from
+ /// vreg => {set of vregs merged in}.
+ DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums;
+
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
LaneBitmask ShrinkMask;
/// True if the main range of the currently coalesced intervals should be
/// checked for smaller live intervals.
- bool ShrinkMainRange;
+ bool ShrinkMainRange = false;
/// True if the coalescer should aggressively coalesce global copies
/// in favor of keeping local copies.
- bool JoinGlobalCopies;
+ bool JoinGlobalCopies = false;
/// True if the coalescer should aggressively coalesce fall-thru
/// blocks exclusively containing copies.
- bool JoinSplitEdges;
+ bool JoinSplitEdges = false;
/// Copy instructions yet to be coalesced.
SmallVector<MachineInstr*, 8> WorkList;
@@ -225,7 +240,8 @@ namespace {
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
/// lanemasks already adjusted to the coalesced register.
void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
- LaneBitmask LaneMask, CoalescerPair &CP);
+ LaneBitmask LaneMask, CoalescerPair &CP,
+ unsigned DstIdx);
/// Join the liveranges of two subregisters. Joins @p RRange into
/// @p LRange, @p RRange may be invalid afterwards.
@@ -325,6 +341,19 @@ namespace {
MI->eraseFromParent();
}
+ /// Walk over function and initialize the DbgVRegToValues map.
+ void buildVRegToDbgValueMap(MachineFunction &MF);
+
+ /// Test whether, after merging, any DBG_VALUEs would refer to a
+ /// different value number than before merging, and whether this can
+ /// be resolved. If not, mark the DBG_VALUE as being undef.
+ void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
+ JoinVals &LHSVals, LiveRange &RHS,
+ JoinVals &RHSVals);
+
+ void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange,
+ LiveRange &RegRange, JoinVals &Vals2);
+
public:
static char ID; ///< Class identification, replacement for typeinfo
@@ -1648,8 +1677,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
}
-void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
- unsigned DstReg,
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
unsigned SubIdx) {
bool DstIsPhys = Register::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1705,8 +1733,15 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
if (!DstInt->hasSubRanges()) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
- DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
+ LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+ DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+ // The unused lanes are just empty live-ranges at this point.
+ // It is the caller responsibility to set the proper
+ // dead segments if there is an actual dead def of the
+ // unused lanes. This may happen with rematerialization.
+ DstInt->createSubRange(Allocator, UnusedLanes);
}
SlotIndex MIIdx = UseMI->isDebugValue()
? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
@@ -2195,6 +2230,7 @@ class JoinVals {
/// NewVNInfo. This is suitable for passing to LiveInterval::join().
SmallVector<int, 8> Assignments;
+ public:
/// Conflict resolution for overlapping values.
enum ConflictResolution {
/// No overlap, simply keep this value.
@@ -2223,6 +2259,7 @@ class JoinVals {
CR_Impossible
};
+ private:
/// Per-value info for LI. The lane bit masks are all relative to the final
/// joined register, so they can be compared directly between SrcReg and
/// DstReg.
@@ -2383,6 +2420,11 @@ public:
/// Get the value assignments suitable for passing to LiveInterval::join.
const int *getAssignments() const { return Assignments.data(); }
+
+ /// Get the conflict resolution for a value number.
+ ConflictResolution getResolution(unsigned Num) const {
+ return Vals[Num].Resolution;
+ }
};
} // end anonymous namespace
@@ -3115,7 +3157,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
LiveInterval *LI) {
for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
// Get the def location before markUnused() below invalidates it.
- SlotIndex Def = LR.getValNumInfo(i)->def;
+ VNInfo *VNI = LR.getValNumInfo(i);
+ SlotIndex Def = VNI->def;
switch (Vals[i].Resolution) {
case CR_Keep: {
// If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
@@ -3131,8 +3174,6 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
// In such cases, removing this def from the main range must be
// complemented by extending the main range to account for the liveness
// of the other subrange.
- VNInfo *VNI = LR.getValNumInfo(i);
- SlotIndex Def = VNI->def;
// The new end point of the main range segment to be extended.
SlotIndex NewEnd;
if (LI != nullptr) {
@@ -3272,7 +3313,8 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
const LiveRange &ToMerge,
LaneBitmask LaneMask,
- CoalescerPair &CP) {
+ CoalescerPair &CP,
+ unsigned ComposeSubRegIdx) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
LI.refineSubRanges(
Allocator, LaneMask,
@@ -3285,7 +3327,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
}
},
- *LIS->getSlotIndexes(), *TRI);
+ *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
}
bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
@@ -3351,12 +3393,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
if (!RHS.hasSubRanges()) {
LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
: TRI->getSubRegIndexLaneMask(SrcIdx);
- mergeSubRangeInto(LHS, RHS, Mask, CP);
+ mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
} else {
// Pair up subranges and merge.
for (LiveInterval::SubRange &R : RHS.subranges()) {
LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
- mergeSubRangeInto(LHS, R, Mask, CP);
+ mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
}
}
LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
@@ -3385,6 +3427,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
while (!ShrinkRegs.empty())
shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
+ // Scan and mark undef any DBG_VALUEs that would refer to a different value.
+ checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
+
// Join RHS into LHS.
LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
@@ -3416,6 +3461,140 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
}
+void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
+{
+ const SlotIndexes &Slots = *LIS->getSlotIndexes();
+ SmallVector<MachineInstr *, 8> ToInsert;
+
+ // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
+ // vreg => DbgValueLoc map.
+ auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
+ for (auto *X : ToInsert)
+ DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X});
+
+ ToInsert.clear();
+ };
+
+ // Iterate over all instructions, collecting them into the ToInsert vector.
+ // Once a non-debug instruction is found, record the slot index of the
+ // collected DBG_VALUEs.
+ for (auto &MBB : MF) {
+ SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
+
+ for (auto &MI : MBB) {
+ if (MI.isDebugValue() && MI.getOperand(0).isReg() &&
+ MI.getOperand(0).getReg().isVirtual()) {
+ ToInsert.push_back(&MI);
+ } else if (!MI.isDebugInstr()) {
+ CurrentSlot = Slots.getInstructionIndex(MI);
+ CloseNewDVRange(CurrentSlot);
+ }
+ }
+
+ // Close range of DBG_VALUEs at the end of blocks.
+ CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
+ }
+
+ // Sort all DBG_VALUEs we've seen by slot number.
+ for (auto &Pair : DbgVRegToValues)
+ llvm::sort(Pair.second);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
+ LiveRange &LHS,
+ JoinVals &LHSVals,
+ LiveRange &RHS,
+ JoinVals &RHSVals) {
+ auto ScanForDstReg = [&](unsigned Reg) {
+ checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
+ };
+
+ auto ScanForSrcReg = [&](unsigned Reg) {
+ checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
+ };
+
+ // Scan for potentially unsound DBG_VALUEs: examine first the register number
+ // Reg, and then any other vregs that may have been merged into it.
+ auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) {
+ Func(Reg);
+ if (DbgMergedVRegNums.count(Reg))
+ for (unsigned X : DbgMergedVRegNums[Reg])
+ Func(X);
+ };
+
+ // Scan for unsound updates of both the source and destination register.
+ PerformScan(CP.getSrcReg(), ScanForSrcReg);
+ PerformScan(CP.getDstReg(), ScanForDstReg);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg,
+ LiveRange &OtherLR,
+ LiveRange &RegLR,
+ JoinVals &RegVals) {
+ // Are there any DBG_VALUEs to examine?
+ auto VRegMapIt = DbgVRegToValues.find(Reg);
+ if (VRegMapIt == DbgVRegToValues.end())
+ return;
+
+ auto &DbgValueSet = VRegMapIt->second;
+ auto DbgValueSetIt = DbgValueSet.begin();
+ auto SegmentIt = OtherLR.begin();
+
+ bool LastUndefResult = false;
+ SlotIndex LastUndefIdx;
+
+ // If the "Other" register is live at a slot Idx, test whether Reg can
+ // safely be merged with it, or should be marked undef.
+ auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
+ &LastUndefIdx](SlotIndex Idx) -> bool {
+ // Our worst-case performance typically happens with asan, causing very
+ // many DBG_VALUEs of the same location. Cache a copy of the most recent
+ // result for this edge-case.
+ if (LastUndefIdx == Idx)
+ return LastUndefResult;
+
+ // If the other range was live, and Reg's was not, the register coalescer
+ // will not have tried to resolve any conflicts. We don't know whether
+ // the DBG_VALUE will refer to the same value number, so it must be made
+ // undef.
+ auto OtherIt = RegLR.find(Idx);
+ if (OtherIt == RegLR.end())
+ return true;
+
+ // Both the registers were live: examine the conflict resolution record for
+ // the value number Reg refers to. CR_Keep meant that this value number
+ // "won" and the merged register definitely refers to that value. CR_Erase
+ // means the value number was a redundant copy of the other value, which
+ // was coalesced and Reg deleted. It's safe to refer to the other register
+ // (which will be the source of the copy).
+ auto Resolution = RegVals.getResolution(OtherIt->valno->id);
+ LastUndefResult = Resolution != JoinVals::CR_Keep &&
+ Resolution != JoinVals::CR_Erase;
+ LastUndefIdx = Idx;
+ return LastUndefResult;
+ };
+
+ // Iterate over both the live-range of the "Other" register, and the set of
+ // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
+ // slot index. This relies on the DbgValueSet being ordered.
+ while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
+ if (DbgValueSetIt->first < SegmentIt->end) {
+ // "Other" is live and there is a DBG_VALUE of Reg: test if we should
+ // set it undef.
+ if (DbgValueSetIt->first >= SegmentIt->start &&
+ DbgValueSetIt->second->getOperand(0).getReg() != 0 &&
+ ShouldUndef(DbgValueSetIt->first)) {
+ // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
+ DbgValueSetIt->second->getOperand(0).setReg(0);
+ continue;
+ }
+ ++DbgValueSetIt;
+ } else {
+ ++SegmentIt;
+ }
+ }
+}
+
namespace {
/// Information concerning MBB coalescing priority.
@@ -3698,6 +3877,10 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
if (VerifyCoalescing)
MF->verify(this, "Before register coalescing");
+ DbgVRegToValues.clear();
+ DbgMergedVRegNums.clear();
+ buildVRegToDbgValueMap(fn);
+
RegClassInfo.runOnMachineFunction(fn);
// Join (coalesce) intervals if requested.
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index ec0868acab38..a5bea1463468 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -220,8 +221,8 @@ void RegScavenger::forward() {
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
bool SubUsed = false;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- if (isRegUsed(*SubRegs)) {
+ for (const MCPhysReg &SubReg : TRI->subregs(Reg))
+ if (isRegUsed(SubReg)) {
SubUsed = true;
break;
}
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index e3f5abb6301f..4ee28d6bbb46 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 019de6554d2a..0f73973c8a51 100644
--- a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -17,9 +17,10 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index ddbbd0f8d6e9..8aa488e63913 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -28,7 +28,6 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -54,6 +53,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -63,6 +63,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -562,7 +563,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
for (Argument *Arg : ByValArguments) {
unsigned Offset = SSL.getObjectOffset(Arg);
- unsigned Align = SSL.getObjectAlignment(Arg);
+ MaybeAlign Align(SSL.getObjectAlignment(Arg));
Type *Ty = Arg->getType()->getPointerElementType();
uint64_t Size = DL.getTypeStoreSize(Ty);
@@ -579,7 +580,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
DIExpression::ApplyOffset, -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
- IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
+ IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size);
}
// Allocate space for every unsafe static AllocaInst on the unsafe stack.
diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index b4037499d7d1..ee72de67d875 100644
--- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
@@ -848,21 +849,20 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
bool &ModifiedDT) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II) {
+ unsigned Alignment;
switch (II->getIntrinsicID()) {
default:
break;
case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
- unsigned Alignment =
- cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
return false;
scalarizeMaskedLoad(CI, ModifiedDT);
return true;
}
case Intrinsic::masked_store: {
- unsigned Alignment =
- cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
MaybeAlign(Alignment)))
return false;
@@ -870,12 +870,15 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
return true;
}
case Intrinsic::masked_gather:
- if (TTI->isLegalMaskedGather(CI->getType()))
+ Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ if (TTI->isLegalMaskedGather(CI->getType(), MaybeAlign(Alignment)))
return false;
scalarizeMaskedGather(CI, ModifiedDT);
return true;
case Intrinsic::masked_scatter:
- if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
+ Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType(),
+ MaybeAlign(Alignment)))
return false;
scalarizeMaskedScatter(CI, ModifiedDT);
return true;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 96a1f86c3e04..d11406cc330f 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -270,8 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
ST.adjustSchedDependency(SU, UseSU, Dep);
- } else
+ } else {
Dep.setLatency(0);
+ // FIXME: We could always let target to adjustSchedDependency(), and
+ // remove this condition, but that currently asserts in Hexagon BE.
+ if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ }
UseSU->addPred(Dep);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e8950b58d42d..e5bc08b9280a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -131,6 +131,7 @@ namespace {
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
+ bool LegalDAG = false;
bool LegalOperations = false;
bool LegalTypes = false;
bool ForCodeSize;
@@ -179,6 +180,12 @@ namespace {
AddToWorklist(Node);
}
+ /// Convenient shorthand to add a node and all of its user to the worklist.
+ void AddToWorklistWithUsers(SDNode *N) {
+ AddUsersToWorklist(N);
+ AddToWorklist(N);
+ }
+
// Prune potentially dangling nodes. This is called after
// any visit to a node, but should also be called during a visit after any
// failed combine which may have created a DAG node.
@@ -217,14 +224,16 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+ ForCodeSize = DAG.shouldOptForSize();
MaximumLegalStoreInBits = 0;
+ // We use the minimum store size here, since that's all we can guarantee
+ // for the scalable vector types.
for (MVT VT : MVT::all_valuetypes())
if (EVT(VT).isSimple() && VT != MVT::Other &&
TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits();
+ VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
}
void ConsiderForPruning(SDNode *N) {
@@ -622,7 +631,7 @@ namespace {
ConstantSDNode *Mask, SDNode *&NodeToMask);
/// Attempt to propagate a given AND node back to load leaves so that they
/// can be combined into narrow loads.
- bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
+ bool BackwardsPropagateMask(SDNode *N);
/// Helper function for MergeConsecutiveStores which merges the
/// component store chains.
@@ -1026,8 +1035,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
// Push the new node and any (possibly new) users onto the worklist.
- AddToWorklist(TLO.New.getNode());
- AddUsersToWorklist(TLO.New.getNode());
+ AddToWorklistWithUsers(TLO.New.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1393,6 +1401,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
void DAGCombiner::Run(CombineLevel AtLevel) {
// set the instance variables, so that the various visit routines may use it.
Level = AtLevel;
+ LegalDAG = Level >= AfterLegalizeDAG;
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
@@ -1419,14 +1428,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// If this combine is running after legalizing the DAG, re-legalize any
// nodes pulled off the worklist.
- if (Level == AfterLegalizeDAG) {
+ if (LegalDAG) {
SmallSetVector<SDNode *, 16> UpdatedNodes;
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
- for (SDNode *LN : UpdatedNodes) {
- AddUsersToWorklist(LN);
- AddToWorklist(LN);
- }
+ for (SDNode *LN : UpdatedNodes)
+ AddToWorklistWithUsers(LN);
+
if (!NIsValid)
continue;
}
@@ -2800,6 +2808,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
return SDValue();
}
+// If we are facing some sort of diamond carry/borrow in/out pattern try to
+// match patterns like:
+//
+// (uaddo A, B) CarryIn
+// | \ |
+// | \ |
+// PartialSum PartialCarryOutX /
+// | | /
+// | ____|____________/
+// | / |
+// (uaddo *, *) \________
+// | \ \
+// | \ |
+// | PartialCarryOutY |
+// | \ |
+// | \ /
+// AddCarrySum | ______/
+// | /
+// CarryOut = (or *, *)
+//
+// And generate ADDCARRY (or SUBCARRY) with two result values:
+//
+// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
+//
+// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
+// a single path for carry/borrow out propagation:
+static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ const TargetLowering &TLI, SDValue Carry0,
+ SDValue Carry1, SDNode *N) {
+ if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
+ return SDValue();
+ unsigned Opcode = Carry0.getOpcode();
+ if (Opcode != Carry1.getOpcode())
+ return SDValue();
+ if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+ return SDValue();
+
+ // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
+ // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
+ // the above ASCII art.)
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ std::swap(Carry0, Carry1);
+ if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+ Carry1.getOperand(1) != Carry0.getValue(0))
+ return SDValue();
+
+ // The carry in value must be on the righthand side for subtraction.
+ unsigned CarryInOperandNum =
+ Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
+ if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
+ return SDValue();
+ SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
+
+ unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+ return SDValue();
+
+ // Verify that the carry/borrow in is plausibly a carry/borrow bit.
+ // TODO: make getAsCarry() aware of how partial carries are merged.
+ if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
+ return SDValue();
+ CarryIn = CarryIn.getOperand(0);
+ if (CarryIn.getValueType() != MVT::i1)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Merged =
+ DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
+ Carry0.getOperand(1), CarryIn);
+
+ // Please note that because we have proven that the result of the UADDO/USUBO
+ // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
+ // therefore prove that if the first UADDO/USUBO overflows, the second
+ // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
+ // maximum value.
+ //
+ // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
+ // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
+ //
+ // This is important because it means that OR and XOR can be used to merge
+ // carry flags; and that AND can return a constant zero.
+ //
+ // TODO: match other operations that can merge flags (ADD, etc)
+ DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+ if (N->getOpcode() == ISD::AND)
+ return DAG.getConstant(0, DL, MVT::i1);
+ return Merged.getValue(1);
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
// fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
@@ -3006,6 +3104,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
N1.getOperand(0)));
+ // A - (A & B) -> A & (~B)
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue A = N1.getOperand(0);
+ SDValue B = N1.getOperand(1);
+ if (A != N0)
+ std::swap(A, B);
+ if (A == N0 &&
+ (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
+ SDValue InvB =
+ DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::AND, DL, VT, A, InvB);
+ }
+ }
+
// fold (X - (-Y * Z)) -> (X + (Y * Z))
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
if (N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -4225,7 +4337,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
unsigned Opcode = N->getOpcode();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegal(Opcode, VT) &&
(N0.isUndef() || DAG.SignBitIsZero(N0)) &&
(N1.isUndef() || DAG.SignBitIsZero(N1))) {
@@ -4543,8 +4654,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
// (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
// (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
if (LL == RL && LR == RR) {
- ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
- : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
+ : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
if (NewCC != ISD::SETCC_INVALID &&
(!LegalOperations ||
(TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
@@ -4856,7 +4967,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
return true;
}
-bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!Mask)
return false;
@@ -5092,6 +5203,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shuffle = XformToShuffleWithZero(N))
return Shuffle;
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -5238,14 +5352,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- if (Level >= AfterLegalizeTypes) {
+ if (LegalTypes) {
// Attempt to propagate the AND back up to the leaves which, if they're
// loads, can be combined to narrow loads and the AND node can be removed.
// Perform after legalization so that extend nodes will already be
// combined into the loads.
- if (BackwardsPropagateMask(N, DAG)) {
+ if (BackwardsPropagateMask(N))
return SDValue(N, 0);
- }
}
if (SDValue Combined = visitANDLike(N0, N1, N))
@@ -5787,6 +5900,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
return BSwap;
@@ -6418,7 +6534,7 @@ static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
// Check if the bytes offsets we are looking at match with either big or
// little endian value loaded. Return true for big endian, false for little
// endian, and None if match failed.
-static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
int64_t FirstOffset) {
// The endian can be decided only when it is 2 bytes at least.
unsigned Width = ByteOffsets.size();
@@ -6491,7 +6607,6 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
return SDValue();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
return SDValue();
@@ -6499,7 +6614,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
// to the same base address. Collect bytes offsets from Base address into
// ByteOffsets.
SDValue CombinedValue;
- SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
int64_t FirstOffset = INT64_MAX;
StoreSDNode *FirstStore = nullptr;
Optional<BaseIndexOffset> Base;
@@ -6655,13 +6770,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
unsigned ByteWidth = VT.getSizeInBits() / 8;
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // Before legalize we can introduce too wide illegal loads which will be later
- // split into legal sized loads. This enables us to combine i64 load by i8
- // patterns to a couple of i32 loads on 32 bit targets.
- if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
- return SDValue();
-
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -6683,12 +6791,22 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if all the bytes of the OR we are looking at are loaded from the same
// base address. Collect bytes offsets from Base address in ByteOffsets.
- SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
- for (unsigned i = 0; i < ByteWidth; i++) {
+ SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
+ unsigned ZeroExtendedBytes = 0;
+ for (int i = ByteWidth - 1; i >= 0; --i) {
auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
- if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ if (!P)
return SDValue();
+ if (P->isConstantZero()) {
+ // It's OK for the N most significant bytes to be 0, we can just
+ // zero-extend the load.
+ if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
+ return SDValue();
+ continue;
+ }
+ assert(P->isMemory() && "provenance should either be memory or zero");
+
LoadSDNode *L = P->Load;
assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
!L->isIndexed() &&
@@ -6727,9 +6845,26 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ bool NeedsZext = ZeroExtendedBytes > 0;
+
+ EVT MemVT =
+ EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
+
+ if (!MemVT.isSimple())
+ return SDValue();
+
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations &&
+ !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ MemVT))
+ return SDValue();
+
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ Optional<bool> IsBigEndian = isBigEndian(
+ makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
if (!IsBigEndian.hasValue())
return SDValue();
@@ -6742,7 +6877,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
LoadSDNode *FirstLoad = FirstByteProvider->Load;
// The node we are looking at matches with the pattern, check if we can
- // replace it with a single load and bswap if needed.
+ // replace it with a single (possibly zero-extended) load and bswap + shift if
+ // needed.
// If the load needs byte swap check if the target supports it
bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
@@ -6750,25 +6886,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
// load and byte shuffling instead of several loads and byte shuffling.
- if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ // We do not introduce illegal bswaps when zero-extending as this tends to
+ // introduce too many arithmetic instructions.
+ if (NeedsBswap && (LegalOperations || NeedsZext) &&
+ !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // If we need to bswap and zero extend, we have to insert a shift. Check that
+ // it is legal.
+ if (NeedsBswap && NeedsZext && LegalOperations &&
+ !TLI.isOperationLegal(ISD::SHL, VT))
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
- bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, *FirstLoad->getMemOperand(), &Fast);
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
- SDValue NewLoad =
- DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
- FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+ SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+ SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), MemVT,
+ FirstLoad->getAlignment());
// Transfer chain users from old loads to the new load.
for (LoadSDNode *L : Loads)
DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
- return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+ if (!NeedsBswap)
+ return NewLoad;
+
+ SDValue ShiftedLoad =
+ NeedsZext
+ ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+ DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
+ SDLoc(N), LegalOperations))
+ : NewLoad;
+ return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
}
// If the target has andn, bsl, or a similar bit-select instruction,
@@ -6904,7 +7060,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue LHS, RHS, CC;
if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
- LHS.getValueType().isInteger());
+ LHS.getValueType());
if (!LegalOperations ||
TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
switch (N0Opcode) {
@@ -6964,6 +7120,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
DAG.getAllOnesConstant(DL, VT));
}
+ // fold (not (add X, -1)) -> (neg X)
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+ }
+
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
@@ -7051,6 +7214,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+ return Combined;
+
return SDValue();
}
@@ -7567,8 +7733,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (VT.isVector())
ExtVT = EVT::getVectorVT(*DAG.getContext(),
ExtVT, VT.getVectorNumElements());
- if ((!LegalOperations ||
- TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+ if (!LegalOperations ||
+ TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
+ TargetLowering::Legal)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
N0.getOperand(0), DAG.getValueType(ExtVT));
}
@@ -7776,26 +7943,40 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
- // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
- // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
- if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+ SDValue InnerShift = N0.getOperand(0);
+ // TODO - support non-uniform vector shift amounts.
+ if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
uint64_t c1 = N001C->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+ EVT InnerShiftVT = InnerShift.getValueType();
+ EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
// This is only valid if the OpSizeInBits + c1 = size of inner shift.
if (c1 + OpSizeInBits == InnerShiftSize) {
- SDLoc DL(N0);
+ SDLoc DL(N);
if (c1 + c2 >= InnerShiftSize)
return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(ISD::SRL, DL, InnerShiftVT,
- N0.getOperand(0).getOperand(0),
- DAG.getConstant(c1 + c2, DL,
- ShiftCountVT)));
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
+ }
+ // In the more general case, we can clear the high bits after the shift:
+ // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
+ if (N0.hasOneUse() && InnerShift.hasOneUse() &&
+ c1 + c2 < InnerShiftSize) {
+ SDLoc DL(N);
+ SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ InnerShift.getOperand(0), NewShiftAmt);
+ SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
+ OpSizeInBits - c2),
+ DL, InnerShiftVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
}
}
}
@@ -8585,6 +8766,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
+ // Try transforming N to an indexed store.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -8609,6 +8794,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+ // Try transforming N to an indexed load.
+ if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -9108,6 +9297,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
return SDValue();
+ assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
+
SDLoc DL(N);
const unsigned NumSplits =
DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
@@ -9125,8 +9316,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+ BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
Loads.push_back(SplitLoad.getValue(0));
Chains.push_back(SplitLoad.getValue(1));
@@ -9365,11 +9555,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
SDLoc dl(Ld);
SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
- SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
- Ld->getBasePtr(), Ld->getMask(),
- PassThru, Ld->getMemoryVT(),
- Ld->getMemOperand(), ExtLoadType,
- Ld->isExpandingLoad());
+ SDValue NewLoad = DAG.getMaskedLoad(
+ VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
+ ExtLoadType, Ld->isExpandingLoad());
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
return NewLoad;
}
@@ -9397,10 +9586,15 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
// sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
// zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
SDLoc DL(N);
- SDValue NotX = DAG.getNOT(DL, X, VT);
- SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
- auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
- return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ unsigned ShCt = VT.getSizeInBits() - 1;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue NotX = DAG.getNOT(DL, X, VT);
+ SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
+ auto ShiftOpcode =
+ N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+ }
}
return SDValue();
}
@@ -9671,6 +9865,29 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
return (Known.Zero | 1).isAllOnesValue();
}
+/// Given an extending node with a pop-count operand, if the target does not
+/// support a pop-count in the narrow source type but does support it in the
+/// destination type, widen the pop-count to the destination type.
+static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
+ assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
+ Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
+
+ SDValue CtPop = Extend->getOperand(0);
+ if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
+ return SDValue();
+
+ EVT VT = Extend->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
+ !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
+ return SDValue();
+
+ // zext (ctpop X) --> ctpop (zext X)
+ SDLoc DL(Extend);
+ SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
+ return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
+}
+
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -9921,6 +10138,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
return SDValue();
}
@@ -10067,6 +10287,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SCC;
}
+ if (SDValue NewCtPop = widenCtPop(N, DAG))
+ return NewCtPop;
+
return SDValue();
}
@@ -10273,17 +10496,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
ShAmt = AdjustBigEndianShift(ShAmt);
- EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
// The original load itself didn't wrap, so an offset within it doesn't.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, DL, PtrType),
- Flags);
+ SDValue NewPtr =
+ DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
AddToWorklist(NewPtr.getNode());
SDValue Load;
@@ -10735,16 +10955,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue VecSrc = N0.getOperand(0);
- EVT SrcVT = VecSrc.getValueType();
- if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+ EVT VecSrcVT = VecSrc.getValueType();
+ if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
(!LegalOperations ||
- TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
SDLoc SL(N);
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
- unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
- VecSrc, DAG.getConstant(Idx, SL, IdxVT));
+ unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
+ DAG.getConstant(Idx, SL, IdxVT));
}
}
@@ -11299,11 +11519,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
@@ -11359,7 +11579,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11373,7 +11594,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -11427,7 +11649,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1, Flags);
@@ -11456,7 +11679,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1, Flags);
@@ -11471,7 +11695,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
if (isContractableFMUL(N120) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0, Flags);
@@ -11489,7 +11714,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0, Flags);
@@ -11510,11 +11736,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+ bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
@@ -11579,7 +11805,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11595,7 +11822,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N10.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11617,7 +11845,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11640,7 +11869,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N000.getValueType())) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11671,7 +11901,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
- isContractableFMUL(N1.getOperand(2))) {
+ isContractableFMUL(N1.getOperand(2)) &&
+ N1->hasOneUse()) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11686,12 +11917,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (N0.getOpcode() == PreferredFusedOpcode &&
+ N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N020.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11716,7 +11949,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -11736,10 +11970,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableFMUL(N120) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11768,7 +12004,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N101 = CvtSrc.getOperand(1);
SDValue N102 = CvtSrc.getOperand(2);
if (isContractableFMUL(N102) &&
- TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+ CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11812,7 +12049,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
(Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
- TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// Floating-point multiply-add with intermediate rounding. This can result
@@ -12402,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
+ // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
+ // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
+ if (!TLI.isFNegFree(VT) &&
+ TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
+ ForCodeSize) == 2)
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ TLI.getNegatedExpression(SDValue(N, 0), DAG,
+ LegalOperations, ForCodeSize),
+ Flags);
return SDValue();
}
@@ -12738,7 +12984,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
- if (DAG.getMachineFunction().getFunction().hasOptSize())
+ if (ForCodeSize)
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
@@ -13135,6 +13381,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
+ // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
+ // was called from a context with a nsz flag if the input fsub does not.
+ if (N0.getOpcode() == ISD::FSUB &&
+ (DAG.getTarget().Options.NoSignedZerosFPMath ||
+ N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
+ N0.getOperand(0), N->getFlags());
+ }
+
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
if (!TLI.isFNegFree(VT) &&
@@ -13168,9 +13424,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (CFP1) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
- if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
- TLI.isOperationLegal(ISD::ConstantFP, VT)))
+ if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(
ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
@@ -13423,12 +13678,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
EVT VT;
unsigned AS;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
return false;
VT = LD->getMemoryVT();
AS = LD->getAddressSpace();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+ if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+ return false;
+ VT = ST->getMemoryVT();
+ AS = ST->getAddressSpace();
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+ if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+ return false;
+ VT = LD->getMemoryVT();
+ AS = LD->getAddressSpace();
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
return false;
VT = ST->getMemoryVT();
@@ -13462,38 +13727,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
VT.getTypeForEVT(*DAG.getContext()), AS);
}
-/// Try turning a load/store into a pre-indexed load/store when the base
-/// pointer is an add or subtract and it has other uses besides the load/store.
-/// After the transformation, the new indexed load/store has effectively folded
-/// the add/subtract in and all of its other uses are redirected to the
-/// new load/store.
-bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
- if (Level < AfterLegalizeDAG)
- return false;
-
- bool isLoad = true;
- SDValue Ptr;
- EVT VT;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
+ bool &IsLoad, bool &IsMasked, SDValue &Ptr,
+ const TargetLowering &TLI) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
if (LD->isIndexed())
return false;
- VT = LD->getMemoryVT();
- if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
- !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
return false;
Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
if (ST->isIndexed())
return false;
- VT = ST->getMemoryVT();
- if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
- !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
return false;
Ptr = ST->getBasePtr();
- isLoad = false;
+ IsLoad = false;
+ } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+ if (LD->isIndexed())
+ return false;
+ EVT VT = LD->getMemoryVT();
+ if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedLoadLegal(Dec, VT))
+ return false;
+ Ptr = LD->getBasePtr();
+ IsMasked = true;
+ } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+ if (ST->isIndexed())
+ return false;
+ EVT VT = ST->getMemoryVT();
+ if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
+ !TLI.isIndexedMaskedStoreLegal(Dec, VT))
+ return false;
+ Ptr = ST->getBasePtr();
+ IsLoad = false;
+ IsMasked = true;
} else {
return false;
}
+ return true;
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+ if (Level < AfterLegalizeDAG)
+ return false;
+
+ bool IsLoad = true;
+ bool IsMasked = false;
+ SDValue Ptr;
+ if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
+ return false;
// If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
// out. There is no reason to make this a preinc/predec.
@@ -13535,8 +13826,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
// Check #2.
- if (!isLoad) {
- SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (!IsLoad) {
+ SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
+ : cast<StoreSDNode>(N)->getValue();
// Would require a copy.
if (Val == BasePtr)
@@ -13612,18 +13904,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
return false;
SDValue Result;
- if (isLoad)
- Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
- else
- Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
+ if (!IsMasked) {
+ if (IsLoad)
+ Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ else
+ Result =
+ DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+ } else {
+ if (IsLoad)
+ Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ else
+ Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM);
+ }
++PreIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
Result.getNode()->dump(&DAG); dbgs() << '\n');
WorklistRemover DeadNodes(*this);
- if (isLoad) {
+ if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
@@ -13677,7 +13977,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// We can now generate the new expression.
SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
- SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+ SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
SDValue NewUse = DAG.getNode(Opcode,
DL,
@@ -13687,7 +13987,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
}
// Replace the uses of Ptr with uses of the updated base value.
- DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+ DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Ptr.getNode());
AddToWorklist(Result.getNode());
@@ -13702,29 +14002,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Level < AfterLegalizeDAG)
return false;
- bool isLoad = true;
+ bool IsLoad = true;
+ bool IsMasked = false;
SDValue Ptr;
- EVT VT;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- if (LD->isIndexed())
- return false;
- VT = LD->getMemoryVT();
- if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
- !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
- return false;
- Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- if (ST->isIndexed())
- return false;
- VT = ST->getMemoryVT();
- if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
- !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
- return false;
- Ptr = ST->getBasePtr();
- isLoad = false;
- } else {
+ if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
+ Ptr, TLI))
return false;
- }
if (Ptr.getNode()->hasOneUse())
return false;
@@ -13760,7 +14043,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// If all the uses are load / store addresses, then don't do the
// transformation.
- if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+ if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
bool RealUse = false;
for (SDNode *UseUse : Use->uses()) {
if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
@@ -13786,18 +14069,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
Worklist.push_back(Op);
if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
!SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
- SDValue Result = isLoad
- ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM)
- : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
- BasePtr, Offset, AM);
+ SDValue Result;
+ if (!IsMasked)
+ Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+ Offset, AM)
+ : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
+ else
+ Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM)
+ : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
+ BasePtr, Offset, AM);
++PostIndexedNodes;
++NodesCombined;
LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
dbgs() << '\n');
WorklistRemover DeadNodes(*this);
- if (isLoad) {
+ if (IsLoad) {
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
} else {
@@ -13809,7 +14098,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// Replace the uses of Use with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
- Result.getValue(isLoad ? 1 : 0));
+ Result.getValue(IsLoad ? 1 : 0));
deleteAndRecombine(Op);
return true;
}
@@ -13923,8 +14212,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
- Offset = (STMemType.getStoreSizeInBits() -
- LDMemType.getStoreSizeInBits()) / 8 - Offset;
+ Offset = ((int64_t)STMemType.getStoreSizeInBits() -
+ (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
// Check that the stored value cover all bits that are loaded.
bool STCoversLD =
@@ -14066,7 +14355,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
return V;
// Try to infer better alignment information than the load already has.
- if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
SDValue NewLoad = DAG.getExtLoad(
@@ -14786,8 +15075,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue Ptr = St->getBasePtr();
if (StOffset) {
SDLoc DL(IVal);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
NewAlign = MinAlign(NewAlign, StOffset);
}
@@ -14898,10 +15186,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
return SDValue();
- SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
- Ptr.getValueType(), Ptr,
- DAG.getConstant(PtrOff, SDLoc(LD),
- Ptr.getValueType()));
+ SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
SDValue NewLD =
DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -15081,7 +15366,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
- int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+ TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
@@ -15466,7 +15751,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
Attribute::NoImplicitFloat);
// This function cannot currently deal with non-byte-sized memory sizes.
- if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+ if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
return false;
if (!MemVT.isSimple())
@@ -16015,6 +16300,9 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
if (Value.getOpcode() == ISD::TargetConstantFP)
return SDValue();
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
SDLoc DL(ST);
SDValue Chain = ST->getChain();
@@ -16075,8 +16363,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getAlignment(), MMOFlags, AAInfo);
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
@@ -16111,8 +16398,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getPointerInfo(), ST->getAlignment(),
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
+ ST->getMemOperand());
}
}
@@ -16121,7 +16407,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return Chain;
// Try to infer better alignment information than the store already has.
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
SDValue NewStore =
@@ -16451,9 +16737,7 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
// Lower value store.
SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
ST->getAlignment(), MMOFlags, AAInfo);
- Ptr =
- DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
// Higher value store.
SDValue St1 =
DAG.getStore(St0, DL, Hi, Ptr,
@@ -16464,11 +16748,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
/// Convert a disguised subvector insertion into a shuffle:
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+ "Expected extract_vector_elt");
SDValue InsertVal = N->getOperand(1);
SDValue Vec = N->getOperand(0);
- // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
- // --> (vector_shuffle X, Y)
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+ // InsIndex)
+ // --> (vector_shuffle X, Y) and variations where shuffle operands may be
+ // CONCAT_VECTORS.
if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(InsertVal.getOperand(1))) {
@@ -16481,18 +16769,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
// Vec's operand 0 is using indices from 0 to N-1 and
// operand 1 from N to 2N - 1, where N is the number of
// elements in the vectors.
- int XOffset = -1;
- if (InsertVal.getOperand(0) == X) {
- XOffset = 0;
- } else if (InsertVal.getOperand(0) == Y) {
- XOffset = X.getValueType().getVectorNumElements();
+ SDValue InsertVal0 = InsertVal.getOperand(0);
+ int ElementOffset = -1;
+
+ // We explore the inputs of the shuffle in order to see if we find the
+ // source of the extract_vector_elt. If so, we can use it to modify the
+ // shuffle rather than perform an insert_vector_elt.
+ SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+ ArgWorkList.emplace_back(Mask.size(), Y);
+ ArgWorkList.emplace_back(0, X);
+
+ while (!ArgWorkList.empty()) {
+ int ArgOffset;
+ SDValue ArgVal;
+ std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+ if (ArgVal == InsertVal0) {
+ ElementOffset = ArgOffset;
+ break;
+ }
+
+ // Peek through concat_vector.
+ if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+ int CurrentArgOffset =
+ ArgOffset + ArgVal.getValueType().getVectorNumElements();
+ int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+ for (SDValue Op : reverse(ArgVal->ops())) {
+ CurrentArgOffset -= Step;
+ ArgWorkList.emplace_back(CurrentArgOffset, Op);
+ }
+
+ // Make sure we went through all the elements and did not screw up index
+ // computation.
+ assert(CurrentArgOffset == ArgOffset);
+ }
}
- if (XOffset != -1) {
+ if (ElementOffset != -1) {
SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
- NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
assert(NewMask[InsIndex] <
(int)(2 * Vec.getValueType().getVectorNumElements()) &&
NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
@@ -16562,13 +16879,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(2);
SDLoc DL(N);
- // If the inserted element is an UNDEF, just use the input vector.
- if (InVal.isUndef())
- return InVec;
-
EVT VT = InVec.getValueType();
unsigned NumElts = VT.getVectorNumElements();
+ // Insert into out-of-bounds element is undefined.
+ if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo))
+ if (IndexC->getZExtValue() >= VT.getVectorNumElements())
+ return DAG.getUNDEF(VT);
+
// Remove redundant insertions:
// (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -16683,7 +17001,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
- NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
+ NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
// The replacement we need to do here is a little tricky: we need to
// replace an extractelement of a load with a load.
@@ -16723,8 +17041,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddUsersToWorklist(Load.getNode()); // Add users too
- AddToWorklist(Load.getNode());
+ AddToWorklistWithUsers(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -18239,22 +18556,61 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBitcast(NVT, NewExtract);
}
}
- // TODO - handle (DestNumElts % SrcNumElts) == 0
+ if ((DestNumElts % SrcNumElts) == 0) {
+ unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+ if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
+ unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
+ SrcVT.getScalarType(), NewExtNumElts);
+ if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ }
}
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
- V.getOperand(0).getValueType() == NVT) {
- unsigned Idx = N->getConstantOperandVal(1);
- unsigned NumElems = NVT.getVectorNumElements();
- assert((Idx % NumElems) == 0 &&
- "IDX in concat is not a multiple of the result vector length.");
- return V->getOperand(Idx / NumElems);
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
+ EVT ConcatSrcVT = V.getOperand(0).getValueType();
+ assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "Concat and extract subvector do not change element type");
+
+ unsigned ExtIdx = N->getConstantOperandVal(1);
+ unsigned ExtNumElts = NVT.getVectorNumElements();
+ assert(ExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+
+ unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
+ unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
+
+ // If the concatenated source types match this extract, it's a direct
+ // simplification:
+ // extract_subvec (concat V1, V2, ...), i --> Vi
+ if (ConcatSrcNumElts == ExtNumElts)
+ return V.getOperand(ConcatOpIdx);
+
+ // If the concatenated source vectors are a multiple length of this extract,
+ // then extract a fraction of one of those source vectors directly from a
+ // concat operand. Example:
+ // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
+ // v2i8 extract_subvec v8i8 Y, 6
+ if (ConcatSrcNumElts % ExtNumElts == 0) {
+ SDLoc DL(N);
+ unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
+ assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
+ "Trying to extract from >1 concat operand?");
+ assert(NewExtIdx % ExtNumElts == 0 &&
+ "Extract index is not a multiple of the input vector length.");
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
+ V.getOperand(ConcatOpIdx), NewIndexC);
+ }
}
V = peekThroughBitcasts(V);
@@ -18962,6 +19318,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return V;
}
+ // A shuffle of a concat of the same narrow vector can be reduced to use
+ // only low-half elements of a concat with undef:
+ // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
+ N0.getNumOperands() == 2 &&
+ N0.getOperand(0) == N0.getOperand(1)) {
+ int HalfNumElts = (int)NumElts / 2;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx >= HalfNumElts) {
+ assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
+ Idx -= HalfNumElts;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (TLI.isShuffleMaskLegal(NewMask, VT)) {
+ SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
+ SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N0.getOperand(0), UndefVec);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -19446,8 +19826,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
int EltIdx = i / Split;
int SubIdx = i % Split;
SDValue Elt = RHS.getOperand(EltIdx);
+ // X & undef --> 0 (not undef). So this lane must be converted to choose
+ // from the zero constant vector (same as if the element had all 0-bits).
if (Elt.isUndef()) {
- Indices.push_back(-1);
+ Indices.push_back(i + NumSubElts);
continue;
}
@@ -19460,14 +19842,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
// Extract the sub element from the constant bit mask.
- if (DAG.getDataLayout().isBigEndian()) {
- Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
- } else {
- Bits.lshrInPlace(SubIdx * NumSubBits);
- }
-
- if (Split > 1)
- Bits = Bits.trunc(NumSubBits);
+ if (DAG.getDataLayout().isBigEndian())
+ Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
+ else
+ Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
if (Bits.isAllOnesValue())
Indices.push_back(i);
@@ -19910,22 +20288,28 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
- SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
- SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
- AddToWorklist(Shift.getNode());
-
- if (XType.bitsGT(AType)) {
- Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
- }
- if (CC == ISD::SETGT)
- Shift = DAG.getNOT(DL, Shift, AType);
+ if (XType.bitsGT(AType)) {
+ Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+ AddToWorklist(Shift.getNode());
+ }
- return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(DL, Shift, AType);
+
+ return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+ }
}
- SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+ unsigned ShCt = XType.getSizeInBits() - 1;
+ if (TLI.shouldAvoidTransformToShift(XType, ShCt))
+ return SDValue();
+
+ SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
AddToWorklist(Shift.getNode());
@@ -20035,31 +20419,29 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
- // TODO: The operation legality checks could be loosened to include "custom",
- // but that may cause regressions for targets that do not have shift
- // instructions.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
- TLI.isOperationLegal(ISD::SHL, VT) &&
- TLI.isOperationLegal(ISD::SRA, VT)) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
- SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
- getShiftAmountTy(AndLHS.getValueType()));
- SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
-
- // Now arithmetic right shift it all the way over, so the result is either
- // all-ones, or zero.
- SDValue ShrAmt =
- DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
- getShiftAmountTy(Shl.getValueType()));
- SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
-
- return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ unsigned ShCt = AndMask.getBitWidth() - 1;
+ if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
+ SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is
+ // either all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(ShCt, SDLoc(Shl),
+ getShiftAmountTy(Shl.getValueType()));
+ SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
}
}
@@ -20073,7 +20455,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
(!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
if (Swap) {
- CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+ CC = ISD::getSetCCInverse(CC, CmpOpVT);
std::swap(N2C, N3C);
}
@@ -20101,10 +20483,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (N2C->isOne())
return Temp;
+ unsigned ShCt = N2C->getAPIntValue().logBase2();
+ if (TLI.shouldAvoidTransformToShift(VT, ShCt))
+ return SDValue();
+
// shl setcc result by log2 n2c
return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
- DAG.getConstant(N2C->getAPIntValue().logBase2(),
- SDLoc(Temp),
+ DAG.getConstant(ShCt, SDLoc(Temp),
getShiftAmountTy(Temp.getValueType())));
}
@@ -20237,7 +20622,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Result = N X_i + X_i (N - N A X_i)
SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
SDNodeFlags Flags) {
- if (Level >= AfterLegalizeDAG)
+ if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
@@ -20376,7 +20761,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
/// Op can be zero.
SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
bool Reciprocal) {
- if (Level >= AfterLegalizeDAG)
+ if (LegalDAG)
return SDValue();
// TODO: Handle half and/or extended types?
@@ -20411,9 +20796,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
- const Function &F = DAG.getMachineFunction().getFunction();
- Attribute Denorms = F.getFnAttribute("denormal-fp-math");
- if (Denorms.getValueAsString().equals("ieee")) {
+ DenormalMode DenormMode = DAG.getDenormalMode(VT);
+ if (DenormMode == DenormalMode::IEEE) {
// fabs(X) < SmallestNormal ? 0.0 : Est
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 6d7260d7aee5..2bec8613e79c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -410,8 +410,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
else if (isa<ConstantPointerNull>(V))
// Translate this as an integer zero so that it can be
// local-CSE'd with actual integer zeros.
- Reg = getRegForValue(
- Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+ Reg =
+ getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType())));
else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
if (CF->isNullValue())
Reg = fastMaterializeFloatZero(CF);
@@ -1190,6 +1190,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
Flags.setSwiftSelf();
if (Arg.IsSwiftError)
Flags.setSwiftError();
+ if (Arg.IsCFGuardTarget)
+ Flags.setCFGuardTarget();
if (Arg.IsByVal)
Flags.setByVal();
if (Arg.IsInAlloca) {
@@ -1236,10 +1238,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
- const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
- MF->addCodeViewHeapAllocSite(CLI.Call, MD);
- }
+ if (CLI.CS)
+ if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"))
+ CLI.Call->setHeapAllocMarker(*MF, MD);
return true;
}
@@ -1275,6 +1276,10 @@ bool FastISel::lowerCall(const CallInst *CI) {
bool IsTailCall = CI->isTailCall();
if (IsTailCall && !isInTailCallPosition(CS, TM))
IsTailCall = false;
+ if (IsTailCall && MF->getFunction()
+ .getFnAttribute("disable-tail-calls")
+ .getValueAsString() == "true")
+ IsTailCall = false;
CallLoweringInfo CLI;
CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
@@ -1926,7 +1931,8 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
TII(*MF->getSubtarget().getInstrInfo()),
TLI(*MF->getSubtarget().getTargetLowering()),
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
- SkipTargetIndependentISel(SkipTargetIndependentISel) {}
+ SkipTargetIndependentISel(SkipTargetIndependentISel),
+ LastLocalValue(nullptr), EmitStartPt(nullptr) {}
FastISel::~FastISel() = default;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cf6711adad48..fa33400cd4b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -144,7 +144,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
if (AI->isStaticAlloca() &&
(TFI->isStackRealignable() || (Align <= StackAlign))) {
const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
- uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
+ uint64_t TySize =
+ MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
@@ -159,6 +160,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
}
+ // Scalable vectors may need a special StackID to distinguish
+ // them from other (fixed size) stack objects.
+ if (Ty->isVectorTy() && Ty->getVectorIsScalable())
+ MF->getFrameInfo().setStackID(FrameIndex,
+ TFI->getStackIDForScalableVectors());
+
StaticAllocaMap[AI] = FrameIndex;
// Update the catch handler information.
if (Iter != CatchObjects.end()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5095995ec2e..c613c2540628 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -882,8 +882,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasExact())
MI->setFlag(MachineInstr::MIFlag::IsExact);
- if (Flags.hasFPExcept())
- MI->setFlag(MachineInstr::MIFlag::FPExcept);
+ if (Flags.hasNoFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
}
// Emit all of the actual operands of this instruction, adding them to the
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f9fdf525240f..80ac8b95e4ef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -134,26 +134,27 @@ private:
ArrayRef<int> Mask) const;
bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
- bool &NeedInvert, const SDLoc &dl);
+ bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
+ bool IsSignaling = false);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
- std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node, bool isSigned);
- SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128);
+ void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
- SDValue ExpandArgFPLibCall(SDNode *Node,
- RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128);
+ void ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -172,12 +173,11 @@ private:
SDValue NewIntValue) const;
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
- SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
- const SDLoc &dl);
- SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
- const SDLoc &dl);
- SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
- const SDLoc &dl);
+ SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
+ void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
+ void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results);
SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
@@ -421,6 +421,9 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ if (!ISD::isNormalStore(ST))
+ return SDValue();
+
LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// FIXME: We shouldn't do this for TargetConstantFP's.
@@ -466,8 +469,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
MMOFlags, AAInfo);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(4, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
ST->getPointerInfo().getWithOffset(4),
MinAlign(Alignment, 4U), MMOFlags, AAInfo);
@@ -577,9 +579,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getNode(
ISD::SRL, dl, Value.getValueType(), Value,
DAG.getConstant(RoundWidth, dl,
@@ -793,9 +793,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
@@ -824,9 +822,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// Load the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
@@ -1013,6 +1009,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
case ISD::SIGN_EXTEND_INREG: {
EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
@@ -1023,11 +1031,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOperand(2).getValueType());
break;
case ISD::SELECT_CC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC:
case ISD::BR_CC: {
unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+ Node->getOpcode() == ISD::STRICT_FSETCC ? 3 :
+ Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 :
Node->getOpcode() == ISD::SETCC ? 2 : 1;
- unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+ unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 :
+ Node->getOpcode() == ISD::STRICT_FSETCC ? 1 :
+ Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0;
MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
ISD::CondCode CCCode =
cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
@@ -1105,16 +1119,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
- case ISD::STRICT_LRINT:
- case ISD::STRICT_LLRINT:
- case ISD::STRICT_LROUND:
- case ISD::STRICT_LLROUND:
- // These pseudo-ops are the same as the other STRICT_ ops except
- // they are registered with setOperationAction() using the input type
- // instead of the output type.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getOperand(1).getValueType());
- break;
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -1125,7 +1129,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
- case ISD::UMULFIXSAT: {
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1408,7 +1414,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
unsigned Offset = TypeByteSize*i;
SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
- Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+ Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl);
// If the destination vector element type is narrower than the source
// element type, only store the bits necessary.
@@ -1471,8 +1477,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
} else {
// Advance the pointer so that the loaded byte will contain the sign bit.
unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
- IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+ IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL);
State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
ByteOffset);
}
@@ -1629,10 +1634,9 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
/// of a true/false result.
///
/// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
- SDValue &RHS, SDValue &CC,
- bool &NeedInvert,
- const SDLoc &dl) {
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(
+ EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
+ const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
@@ -1650,7 +1654,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
}
// Swapping operands didn't work. Try inverting the condition.
bool NeedSwap = false;
- InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
+ InvCC = getSetCCInverse(CCCode, OpVT);
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
// on top of it.
@@ -1715,13 +1719,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
// If we aren't the ordered or unorder operation,
// then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
- SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
- SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
} else {
// Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
- SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
- SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+ SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+ SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
}
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+ SetCC2.getValue(1));
LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
RHS = SDValue();
CC = SDValue();
@@ -2077,52 +2084,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
-// Expand a node into a call to a libcall. Similar to
-// ExpandLibCall except that the first operand is the in-chain.
-std::pair<SDValue, SDValue>
-SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node,
- bool isSigned) {
- SDValue InChain = Node->getOperand(0);
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Node->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Node->getOperand(i);
- Entry.Ty = ArgTy;
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node))
- .setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
- std::move(Args))
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned);
-
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo;
-}
-
-SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128) {
- if (Node->isStrictFPOpcode())
- Node = DAG.mutateStrictFPToFP(Node);
-
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2132,7 +2100,22 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
case MVT::f128: LC = Call_F128; break;
case MVT::ppcf128: LC = Call_PPCF128; break;
}
- return ExpandLibCall(LC, Node, false);
+
+ if (Node->isStrictFPOpcode()) {
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false);
+ Results.push_back(Tmp);
+ }
}
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
@@ -2155,17 +2138,17 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
/// Expand the node to a libcall based on first argument type (for instance
/// lround and its variant).
-SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
- RTLIB::Libcall Call_F32,
- RTLIB::Libcall Call_F64,
- RTLIB::Libcall Call_F80,
- RTLIB::Libcall Call_F128,
- RTLIB::Libcall Call_PPCF128) {
- if (Node->isStrictFPOpcode())
- Node = DAG.mutateStrictFPToFP(Node);
+void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType();
RTLIB::Libcall LC;
- switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+ switch (InVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::f32: LC = Call_F32; break;
case MVT::f64: LC = Call_F64; break;
@@ -2174,7 +2157,21 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
case MVT::ppcf128: LC = Call_PPCF128; break;
}
- return ExpandLibCall(LC, Node, false);
+ if (Node->isStrictFPOpcode()) {
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ // FIXME: This doesn't support tail calls.
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ } else {
+ SDValue Tmp = ExpandLibCall(LC, Node, false);
+ Results.push_back(Tmp);
+ }
}
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
@@ -2344,9 +2341,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
/// legal for the target.
-SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
- EVT DestVT,
- const SDLoc &dl) {
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
+ SDValue &Chain) {
+ bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Node->getOpcode() == ISD::SINT_TO_FP);
+ EVT DestVT = Node->getValueType(0);
+ SDLoc dl(Node);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Op0 = Node->getOperand(OpNo);
EVT SrcVT = Op0.getValueType();
// TODO: Should any fast-math-flags be set for the created nodes?
@@ -2393,16 +2395,39 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
BitsToDouble(0x4330000080000000ULL) :
BitsToDouble(0x4330000000000000ULL),
dl, MVT::f64);
- // subtract the bias
- SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
- // final result
- SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ // Subtract the bias and get the final result.
+ SDValue Sub;
+ SDValue Result;
+ if (Node->isStrictFPOpcode()) {
+ Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+ {Node->getOperand(0), Load, Bias});
+ Chain = Sub.getValue(1);
+ if (DestVT != Sub.getValueType()) {
+ std::pair<SDValue, SDValue> ResultPair;
+ ResultPair =
+ DAG.getStrictFPExtendOrRound(Sub, Chain, dl, DestVT);
+ Result = ResultPair.first;
+ Chain = ResultPair.second;
+ }
+ else
+ Result = Sub;
+ } else {
+ Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+ Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+ }
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
// Code below here assumes !isSigned without checking again.
+ // FIXME: This can produce slightly incorrect results. See details in
+ // FIXME: https://reviews.llvm.org/D69275
- SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+ SDValue Tmp1;
+ if (Node->isStrictFPOpcode()) {
+ Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+ { Node->getOperand(0), Op0 });
+ } else
+ Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
@@ -2448,6 +2473,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
FudgeInReg = Handle.getValue();
}
+ if (Node->isStrictFPOpcode()) {
+ SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+ { Tmp1.getValue(1), Tmp1, FudgeInReg });
+ Chain = Result.getValue(1);
+ return Result;
+ }
+
return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
}
@@ -2456,9 +2488,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
/// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
- bool isSigned,
- const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+ SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+ unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+ unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+
// First step, figure out the appropriate *INT_TO_FP operation to use.
EVT NewInTy = LegalOp.getValueType();
@@ -2470,15 +2509,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
assert(NewInTy.isInteger() && "Ran out of possibilities!");
// If the target supports SINT_TO_FP of this type, use it.
- if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
- OpToUse = ISD::SINT_TO_FP;
+ if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
+ OpToUse = SIntOp;
break;
}
- if (isSigned) continue;
+ if (IsSigned)
+ continue;
// If the target supports UINT_TO_FP of this type, use it.
- if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
- OpToUse = ISD::UINT_TO_FP;
+ if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
+ OpToUse = UIntOp;
break;
}
@@ -2487,9 +2527,21 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
// Okay, we found the operation and type to use. Zero extend our input to the
// desired type then run the operation on it.
- return DAG.getNode(OpToUse, dl, DestVT,
- DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
- dl, NewInTy, LegalOp));
+ if (IsStrict) {
+ SDValue Res =
+ DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
+ {N->getOperand(0),
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
+ }
+
+ Results.push_back(
+ DAG.getNode(OpToUse, dl, DestVT,
+ DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+ dl, NewInTy, LegalOp)));
}
/// This function is responsible for legalizing a
@@ -2497,9 +2549,13 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
/// we promote it. At this point, we know that the result and operand types are
/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
/// operation that returns a larger result.
-SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
- bool isSigned,
- const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+ SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = N->isStrictFPOpcode();
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+ EVT DestVT = N->getValueType(0);
+ SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
// First step, figure out the appropriate FP_TO*INT operation to use.
EVT NewOutTy = DestVT;
@@ -2512,26 +2568,32 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
// A larger signed type can hold all unsigned values of the requested type,
// so using FP_TO_SINT is valid
- if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
- OpToUse = ISD::FP_TO_SINT;
+ OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT;
+ if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
break;
- }
// However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
- if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
- OpToUse = ISD::FP_TO_UINT;
+ OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT;
+ if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
break;
- }
// Otherwise, try a larger type.
}
// Okay, we found the operation and type to use.
- SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+ SDValue Operation;
+ if (IsStrict) {
+ SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other);
+ Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp);
+ } else
+ Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
// Truncate the result of the extended FP_TO_*INT operation to the desired
// size.
- return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+ Results.push_back(Trunc);
+ if (IsStrict)
+ Results.push_back(Operation.getValue(1));
}
/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
@@ -2812,12 +2874,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::STRICT_FP_ROUND:
- // This expansion does not honor the "strict" properties anyway,
- // so prefer falling back to the non-strict operation if legal.
+ // When strict mode is enforced we can't do expansion because it
+ // does not honor the "strict" properties. Only libcall is allowed.
+ if (TLI.isStrictFPEnabled())
+ break;
+ // We might as well mutate to FP_ROUND when FP_ROUND operation is legal
+ // since this operation is more efficient than stack operation.
if (TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0))
== TargetLowering::Legal)
break;
+ // We fall back to use stack operation when the FP_ROUND operation
+ // isn't available.
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getValueType(0),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2832,12 +2900,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
- // This expansion does not honor the "strict" properties anyway,
- // so prefer falling back to the non-strict operation if legal.
+ // When strict mode is enforced we can't do expansion because it
+ // does not honor the "strict" properties. Only libcall is allowed.
+ if (TLI.isStrictFPEnabled())
+ break;
+ // We might as well mutate to FP_EXTEND when FP_EXTEND operation is legal
+ // since this operation is more efficient than stack operation.
if (TLI.getStrictFPOperationAction(Node->getOpcode(),
Node->getValueType(0))
== TargetLowering::Legal)
break;
+ // We fall back to use stack operation when the FP_EXTEND operation
+ // isn't available.
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getOperand(1).getValueType(),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2883,15 +2957,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::UINT_TO_FP:
- if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+ case ISD::STRICT_UINT_TO_FP:
+ if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
break;
}
LLVM_FALLTHROUGH;
case ISD::SINT_TO_FP:
- Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
- Node->getOperand(0), Node->getValueType(0), dl);
+ case ISD::STRICT_SINT_TO_FP:
+ Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
Results.push_back(Tmp1);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Tmp2);
break;
case ISD::FP_TO_SINT:
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@@ -3340,6 +3419,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::UMULFIXSAT:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
+ Node->getOperand(0),
+ Node->getOperand(1),
+ Node->getConstantOperandVal(2),
+ DAG)) {
+ Results.push_back(V);
+ break;
+ }
+ // FIXME: We might want to retry here with a wider type if we fail, if that
+ // type is legal.
+ // FIXME: Technically, so long as we only have sdivfixes where BW+Scale is
+ // <= 128 (which is the case for all of the default Embedded-C types),
+ // we will only get here with types and scales that we could always expand
+ // if we were allowed to generate libcalls to division functions of illegal
+ // type. But we cannot do that.
+ llvm_unreachable("Cannot expand DIVFIX!");
case ISD::ADDCARRY:
case ISD::SUBCARRY: {
SDValue LHS = Node->getOperand(0);
@@ -3503,12 +3600,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
Results.push_back(Tmp1);
break;
- case ISD::SETCC: {
- Tmp1 = Node->getOperand(0);
- Tmp2 = Node->getOperand(1);
- Tmp3 = Node->getOperand(2);
- bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
- Tmp3, NeedInvert, dl);
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ bool IsStrict = Node->getOpcode() != ISD::SETCC;
+ bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ unsigned Offset = IsStrict ? 1 : 0;
+ Tmp1 = Node->getOperand(0 + Offset);
+ Tmp2 = Node->getOperand(1 + Offset);
+ Tmp3 = Node->getOperand(2 + Offset);
+ bool Legalized =
+ LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
+ NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3523,9 +3627,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
Results.push_back(Tmp1);
+ if (IsStrict)
+ Results.push_back(Chain);
+
break;
}
+ // FIXME: It seems Legalized is false iff CCCode is Legal. I don't
+ // understand if this code is useful for strict nodes.
+ assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
@@ -3548,11 +3659,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::SELECT_CC: {
+ // TODO: need to add STRICT_SELECT_CC and STRICT_SELECT_CCS
Tmp1 = Node->getOperand(0); // LHS
Tmp2 = Node->getOperand(1); // RHS
Tmp3 = Node->getOperand(2); // True
Tmp4 = Node->getOperand(3); // False
EVT VT = Node->getValueType(0);
+ SDValue Chain;
SDValue CC = Node->getOperand(4);
ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
@@ -3574,8 +3687,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Try to legalize by inverting the condition. This is for targets that
// might support an ordered version of a condition, but not the unordered
// version (or vice versa).
- ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
- Tmp1.getValueType().isInteger());
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType());
if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
// Use the new condition code and swap true and false
Legalized = true;
@@ -3595,9 +3707,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
if (!Legalized) {
- Legalized = LegalizeSetCCCondCode(
- getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
- dl);
+ Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
+ Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
@@ -3623,13 +3734,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::BR_CC: {
+ // TODO: need to add STRICT_BR_CC and STRICT_BR_CCS
+ SDValue Chain;
Tmp1 = Node->getOperand(0); // Chain
Tmp2 = Node->getOperand(2); // LHS
Tmp3 = Node->getOperand(3); // RHS
Tmp4 = Node->getOperand(1); // CC
- bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
- Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+ bool Legalized =
+ LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
+ Tmp3, Tmp4, NeedInvert, dl, Chain);
(void)Legalized;
assert(Legalized && "Can't legalize BR_CC with legal condition!");
@@ -3677,7 +3791,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
- ReplaceNode(SDValue(Node, 0), Result);
+ Results.push_back(Result);
break;
}
case ISD::VECREDUCE_FADD:
@@ -3705,10 +3819,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID:
// FIXME: Custom lowering for these operations shouldn't return null!
- break;
+ // Return true so that we don't call ConvertNodeToLibcall which also won't
+ // do anything.
+ return true;
}
- if (Results.empty() && Node->isStrictFPOpcode()) {
+ if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) {
// FIXME: We were asked to expand a strict floating-point operation,
// but there is currently no expansion implemented that would preserve
// the "strict" properties. For now, we just fall back to the non-strict
@@ -3793,7 +3909,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
- std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Ops, CallOptions,
+ SDLoc(Node),
+ Node->getOperand(0));
Results.push_back(Tmp.first);
Results.push_back(Tmp.second);
break;
@@ -3815,38 +3937,38 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
}
case ISD::FMINNUM:
case ISD::STRICT_FMINNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128, Results);
break;
case ISD::FMAXNUM:
case ISD::STRICT_FMAXNUM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128, Results);
break;
case ISD::FSQRT:
case ISD::STRICT_FSQRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128, Results);
break;
case ISD::FCBRT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
- RTLIB::CBRT_F80, RTLIB::CBRT_F128,
- RTLIB::CBRT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128, Results);
break;
case ISD::FSIN:
case ISD::STRICT_FSIN:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128, Results);
break;
case ISD::FCOS:
case ISD::STRICT_FCOS:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128, Results);
break;
case ISD::FSINCOS:
// Expand into sincos libcall.
@@ -3855,181 +3977,204 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
case ISD::FLOG:
case ISD::STRICT_FLOG:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
- RTLIB::LOG_FINITE_F64,
- RTLIB::LOG_FINITE_F80,
- RTLIB::LOG_FINITE_F128,
- RTLIB::LOG_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+ RTLIB::LOG_FINITE_F64,
+ RTLIB::LOG_FINITE_F80,
+ RTLIB::LOG_FINITE_F128,
+ RTLIB::LOG_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128, Results);
break;
case ISD::FLOG2:
case ISD::STRICT_FLOG2:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
- RTLIB::LOG2_FINITE_F64,
- RTLIB::LOG2_FINITE_F80,
- RTLIB::LOG2_FINITE_F128,
- RTLIB::LOG2_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+ RTLIB::LOG2_FINITE_F64,
+ RTLIB::LOG2_FINITE_F80,
+ RTLIB::LOG2_FINITE_F128,
+ RTLIB::LOG2_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128, Results);
break;
case ISD::FLOG10:
case ISD::STRICT_FLOG10:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
- RTLIB::LOG10_FINITE_F64,
- RTLIB::LOG10_FINITE_F80,
- RTLIB::LOG10_FINITE_F128,
- RTLIB::LOG10_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+ RTLIB::LOG10_FINITE_F64,
+ RTLIB::LOG10_FINITE_F80,
+ RTLIB::LOG10_FINITE_F128,
+ RTLIB::LOG10_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128, Results);
break;
case ISD::FEXP:
case ISD::STRICT_FEXP:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
- RTLIB::EXP_FINITE_F64,
- RTLIB::EXP_FINITE_F80,
- RTLIB::EXP_FINITE_F128,
- RTLIB::EXP_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+ RTLIB::EXP_FINITE_F64,
+ RTLIB::EXP_FINITE_F80,
+ RTLIB::EXP_FINITE_F128,
+ RTLIB::EXP_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128, Results);
break;
case ISD::FEXP2:
case ISD::STRICT_FEXP2:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
- RTLIB::EXP2_FINITE_F64,
- RTLIB::EXP2_FINITE_F80,
- RTLIB::EXP2_FINITE_F128,
- RTLIB::EXP2_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+ RTLIB::EXP2_FINITE_F64,
+ RTLIB::EXP2_FINITE_F80,
+ RTLIB::EXP2_FINITE_F128,
+ RTLIB::EXP2_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128, Results);
break;
case ISD::FTRUNC:
case ISD::STRICT_FTRUNC:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128, Results);
break;
case ISD::FFLOOR:
case ISD::STRICT_FFLOOR:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128, Results);
break;
case ISD::FCEIL:
case ISD::STRICT_FCEIL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128, Results);
break;
case ISD::FRINT:
case ISD::STRICT_FRINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128, Results);
break;
case ISD::FNEARBYINT:
case ISD::STRICT_FNEARBYINT:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128, Results);
break;
case ISD::FROUND:
case ISD::STRICT_FROUND:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128, Results);
break;
case ISD::FPOWI:
- case ISD::STRICT_FPOWI:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128));
+ case ISD::STRICT_FPOWI: {
+ RTLIB::Libcall LC;
+ switch (Node->getSimpleValueType(0).SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = RTLIB::POWI_F32; break;
+ case MVT::f64: LC = RTLIB::POWI_F64; break;
+ case MVT::f80: LC = RTLIB::POWI_F80; break;
+ case MVT::f128: LC = RTLIB::POWI_F128; break;
+ case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break;
+ }
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node),
+ Node->getValueType(0),
+ Node->getOperand(1));
+ Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
+ Node->getValueType(0), Node->getOperand(0),
+ Exponent));
+ break;
+ }
+ ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128, Results);
break;
+ }
case ISD::FPOW:
case ISD::STRICT_FPOW:
if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
- RTLIB::POW_FINITE_F64,
- RTLIB::POW_FINITE_F80,
- RTLIB::POW_FINITE_F128,
- RTLIB::POW_FINITE_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+ RTLIB::POW_FINITE_F64,
+ RTLIB::POW_FINITE_F80,
+ RTLIB::POW_FINITE_F128,
+ RTLIB::POW_FINITE_PPCF128, Results);
else
- Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128, Results);
break;
case ISD::LROUND:
case ISD::STRICT_LROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
- RTLIB::LROUND_F64, RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128, Results);
break;
case ISD::LLROUND:
case ISD::STRICT_LLROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128, Results);
break;
case ISD::LRINT:
case ISD::STRICT_LRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
- RTLIB::LRINT_F64, RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128, Results);
break;
case ISD::LLRINT:
case ISD::STRICT_LLRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128));
+ ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128, Results);
break;
case ISD::FDIV:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
- RTLIB::DIV_F80, RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128));
+ case ISD::STRICT_FDIV:
+ ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+ RTLIB::DIV_F80, RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128, Results);
break;
case ISD::FREM:
case ISD::STRICT_FREM:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128, Results);
break;
case ISD::FMA:
case ISD::STRICT_FMA:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
- RTLIB::FMA_F80, RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128));
+ ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128, Results);
break;
case ISD::FADD:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128));
+ case ISD::STRICT_FADD:
+ ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128, Results);
break;
case ISD::FMUL:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
- RTLIB::MUL_F80, RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128));
+ case ISD::STRICT_FMUL:
+ ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+ RTLIB::MUL_F80, RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128, Results);
break;
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
@@ -4044,9 +4189,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
}
case ISD::FSUB:
- Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
- RTLIB::SUB_F80, RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128));
+ case ISD::STRICT_FSUB:
+ ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+ RTLIB::SUB_F80, RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128, Results);
break;
case ISD::SREM:
Results.push_back(ExpandIntLibCall(Node, true,
@@ -4129,6 +4275,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
OVT = Node->getOperand(0).getSimpleValueType();
}
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ OVT = Node->getOperand(1).getSimpleValueType();
if (Node->getOpcode() == ISD::BR_CC)
OVT = Node->getOperand(2).getSimpleValueType();
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
@@ -4177,16 +4326,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
}
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
- Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
- Node->getOpcode() == ISD::FP_TO_SINT, dl);
- Results.push_back(Tmp1);
+ case ISD::STRICT_FP_TO_SINT:
+ PromoteLegalFP_TO_INT(Node, dl, Results);
break;
case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
- Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
- Node->getOpcode() == ISD::SINT_TO_FP, dl);
- Results.push_back(Tmp1);
+ case ISD::STRICT_SINT_TO_FP:
+ PromoteLegalINT_TO_FP(Node, dl, Results);
break;
case ISD::VAARG: {
SDValue Chain = Node->getOperand(0); // Get the chain.
@@ -4358,6 +4507,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FREM:
+ case ISD::STRICT_FPOW:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(2)});
+ Tmp3 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+ Tmp2.getValue(1));
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp3, Tmp1, Tmp2});
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp1);
+ Results.push_back(Tmp1.getValue(1));
+ break;
case ISD::FMA:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4404,6 +4568,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FSIN:
+ case ISD::STRICT_FCOS:
+ case ISD::STRICT_FLOG:
+ case ISD::STRICT_FLOG10:
+ case ISD::STRICT_FEXP:
+ Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+ {Tmp1.getValue(1), Tmp1});
+ Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+ {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+ Results.push_back(Tmp3);
+ Results.push_back(Tmp3.getValue(1));
+ break;
case ISD::BUILD_VECTOR: {
MVT EltVT = OVT.getVectorElementType();
MVT NewEltVT = NVT.getVectorElementType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 72d052473f11..f191160dee4f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -65,39 +65,68 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
+ case ISD::STRICT_FMINNUM:
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
+ case ISD::STRICT_FMAXNUM:
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
+ case ISD::STRICT_FADD:
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
+ case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break;
+ case ISD::STRICT_FCEIL:
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
+ case ISD::STRICT_FCOS:
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
+ case ISD::STRICT_FDIV:
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
+ case ISD::STRICT_FEXP:
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
+ case ISD::STRICT_FEXP2:
case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break;
+ case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break;
+ case ISD::STRICT_FLOG:
case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break;
+ case ISD::STRICT_FLOG2:
case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break;
+ case ISD::STRICT_FLOG10:
case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break;
+ case ISD::STRICT_FMA:
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
+ case ISD::STRICT_FMUL:
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
+ case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::STRICT_FPOW:
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
+ case ISD::STRICT_FPOWI:
case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
+ case ISD::STRICT_FRINT:
case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break;
+ case ISD::STRICT_FROUND:
case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break;
+ case ISD::STRICT_FSIN:
case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break;
+ case ISD::STRICT_FSQRT:
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
+ case ISD::STRICT_FSUB:
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
@@ -111,6 +140,46 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
}
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getNumOperands() == (1 + Offset) &&
+ "Unexpected number of operands!");
+ SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpVT = N->getOperand(0 + Offset).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) {
+ bool IsStrict = N->isStrictFPOpcode();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getNumOperands() == (2 + Offset) &&
+ "Unexpected number of operands!");
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ GetSoftenedFloat(N->getOperand(1 + Offset)) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -174,69 +243,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FMIN_F32,
- RTLIB::FMIN_F64,
- RTLIB::FMIN_F80,
- RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32,
+ RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80,
+ RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FMAX_F32,
- RTLIB::FMAX_F64,
- RTLIB::FMAX_F80,
- RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32,
+ RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80,
+ RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::ADD_F32,
- RTLIB::ADD_F64,
- RTLIB::ADD_F80,
- RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32,
+ RTLIB::ADD_F64,
+ RTLIB::ADD_F80,
+ RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CBRT_F32,
+ RTLIB::CBRT_F64,
+ RTLIB::CBRT_F80,
+ RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::CEIL_F32,
- RTLIB::CEIL_F64,
- RTLIB::CEIL_F80,
- RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32,
+ RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80,
+ RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -288,231 +336,170 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::COS_F32,
- RTLIB::COS_F64,
- RTLIB::COS_F80,
- RTLIB::COS_F128,
- RTLIB::COS_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32,
+ RTLIB::COS_F64,
+ RTLIB::COS_F80,
+ RTLIB::COS_F128,
+ RTLIB::COS_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::EXP_F32,
- RTLIB::EXP_F64,
- RTLIB::EXP_F80,
- RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32,
+ RTLIB::EXP_F64,
+ RTLIB::EXP_F80,
+ RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::EXP2_F32,
- RTLIB::EXP2_F64,
- RTLIB::EXP2_F80,
- RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32,
+ RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80,
+ RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32,
- RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80,
- RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32,
+ RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80,
+ RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::LOG_F32,
- RTLIB::LOG_F64,
- RTLIB::LOG_F80,
- RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32,
+ RTLIB::LOG_F64,
+ RTLIB::LOG_F80,
+ RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::LOG2_F32,
- RTLIB::LOG2_F64,
- RTLIB::LOG2_F80,
- RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32,
+ RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80,
+ RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32,
- RTLIB::LOG10_F64,
- RTLIB::LOG10_F80,
- RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32,
+ RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80,
+ RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)),
- GetSoftenedFloat(N->getOperand(2)) };
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ GetSoftenedFloat(N->getOperand(1 + Offset)),
+ GetSoftenedFloat(N->getOperand(2 + Offset)) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[3] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType(),
- N->getOperand(2).getValueType() };
+ EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType(),
+ N->getOperand(2 + Offset).getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::FMA_F32,
- RTLIB::FMA_F64,
- RTLIB::FMA_F80,
- RTLIB::FMA_F128,
- RTLIB::FMA_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG,
+ GetFPLibCall(N->getValueType(0),
+ RTLIB::FMA_F32,
+ RTLIB::FMA_F64,
+ RTLIB::FMA_F80,
+ RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128),
+ NVT, Ops, CallOptions, SDLoc(N), Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::MUL_F32,
- RTLIB::MUL_F64,
- RTLIB::MUL_F80,
- RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::MUL_F32,
+ RTLIB::MUL_F64,
+ RTLIB::MUL_F80,
+ RTLIB::MUL_F128,
+ RTLIB::MUL_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- EVT FloatVT = N->getValueType(0);
- if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
- // Expand Y = FNEG(X) -> Y = X ^ sign mask
- APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
- return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
- DAG.getConstant(SignMask, dl, NVT));
- }
-
- // Expand Y = FNEG(X) -> Y = SUB -0.0, X
- SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
- GetSoftenedFloat(N->getOperand(0)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128),
- NVT, Ops, CallOptions, dl).first;
+ // Expand Y = FNEG(X) -> Y = X ^ sign mask
+ APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+ DAG.getConstant(SignMask, dl, NVT));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = N->getOperand(0);
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
- // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
- // entirely possible for both f16 and f32 to be legal, so use the fully
- // hard-float FP_EXTEND rather than FP16_TO_FP.
- if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
- Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
- if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
- AddToWorklist(Op.getNode());
- }
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
Op = GetPromotedFloat(Op);
// If the promotion did the FP_EXTEND to the destination type for us,
// there's nothing left to do here.
- if (Op.getValueType() == N->getValueType(0)) {
+ if (Op.getValueType() == N->getValueType(0))
return BitConvertToInteger(Op);
+ }
+
+ // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+ // entirely possible for both f16 and f32 to be legal, so use the fully
+ // hard-float FP_EXTEND rather than FP16_TO_FP.
+ if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ if (IsStrict) {
+ Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+ { MVT::f32, MVT::Other }, { Chain, Op });
+ Chain = Op.getValue(1);
+ } else {
+ Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
}
}
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -535,167 +522,127 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+ bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = N->getOperand(0);
- if (N->getValueType(0) == MVT::f16) {
- // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
- // storage-only type get a chance to select things.
- return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
- }
-
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::POW_F32,
- RTLIB::POW_F64,
- RTLIB::POW_F80,
- RTLIB::POW_F128,
- RTLIB::POW_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32,
+ RTLIB::POW_F64,
+ RTLIB::POW_F80,
+ RTLIB::POW_F128,
+ RTLIB::POW_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
- assert(N->getOperand(1).getValueType() == MVT::i32 &&
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 &&
"Unsupported power type!");
+ RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32,
+ RTLIB::POWI_F64,
+ RTLIB::POWI_F80,
+ RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128);
+ if (!TLI.getLibcallName(LC)) {
+ // Some targets don't have a powi libcall; use pow instead.
+ // FIXME: Implement this if some target needs it.
+ DAG.getContext()->emitError("Don't know how to soften fpowi to fpow");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+ N->getOperand(1 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
+ EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+ N->getOperand(1 + Offset).getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32,
- RTLIB::POWI_F64,
- RTLIB::POWI_F80,
- RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::REM_F32,
- RTLIB::REM_F64,
- RTLIB::REM_F80,
- RTLIB::REM_F128,
- RTLIB::REM_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32,
+ RTLIB::REM_F64,
+ RTLIB::REM_F80,
+ RTLIB::REM_F128,
+ RTLIB::REM_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::RINT_F32,
- RTLIB::RINT_F64,
- RTLIB::RINT_F80,
- RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32,
+ RTLIB::RINT_F64,
+ RTLIB::RINT_F80,
+ RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SIN_F32,
- RTLIB::SIN_F64,
- RTLIB::SIN_F80,
- RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32,
+ RTLIB::SIN_F64,
+ RTLIB::SIN_F80,
+ RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SQRT_F32,
- RTLIB::SQRT_F64,
- RTLIB::SQRT_F80,
- RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32,
+ RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80,
+ RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
- GetSoftenedFloat(N->getOperand(1)) };
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[2] = { N->getOperand(0).getValueType(),
- N->getOperand(1).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128),
- NVT, Ops, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- if (N->getValueType(0) == MVT::f16)
- return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
-
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::TRUNC_F32,
- RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80,
- RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32,
+ RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80,
+ RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128));
}
SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -715,8 +662,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
L->getAAInfo());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- if (N != NewL.getValue(1).getNode())
- ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
return NewL;
}
@@ -771,8 +717,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
- bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::SINT_TO_FP ||
+ N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+ EVT SVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
EVT RVT = N->getValueType(0);
EVT NVT = EVT();
SDLoc dl(N);
@@ -790,16 +738,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
// Sign/zero extend the argument if the libcall takes a larger type.
SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
- NVT, N->getOperand(0));
+ NVT, N->getOperand(IsStrict ? 1 : 0));
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(Signed);
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC,
- TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- Op, CallOptions, dl).first;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+ Op, CallOptions, dl, Chain);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ return Tmp.first;
}
@@ -822,18 +774,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
- case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::STRICT_LROUND:
case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
+ case ISD::STRICT_LLROUND:
case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
+ case ISD::STRICT_LRINT:
case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
+ case ISD::STRICT_LLRINT:
case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
+ case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -845,7 +806,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand promotion");
+ "Invalid operand softening");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
@@ -857,42 +818,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
- // If we get here, the result must be legal but the source illegal.
- EVT SVT = N->getOperand(0).getValueType();
- EVT RVT = N->getValueType(0);
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (SVT == MVT::f16)
- return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
-
- RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
-
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
-}
-
-
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// We actually deal with the partially-softened FP_TO_FP16 node too, which
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
- assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+ assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_ROUND);
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ Op = GetSoftenedFloat(Op);
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+ }
+ return Tmp.first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -920,8 +873,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
- bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
- EVT SVT = N->getOperand(0).getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
EVT NVT = EVT();
SDLoc dl(N);
@@ -937,18 +894,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
NVT = (MVT::SimpleValueType)IntVT;
// The type needs to big enough to hold the result.
if (NVT.bitsGE(RVT))
- LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+ LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT);
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ Op = GetSoftenedFloat(Op);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
+ CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, dl, Chain);
// Truncate the result if the libcall returns a larger type.
- return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
+ SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first);
+
+ if (!IsStrict)
+ return Res;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -976,26 +941,39 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
- SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
- ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Op1 = N->getOperand(IsStrict ? 2 : 1);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ ISD::CondCode CCCode =
+ cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+
+ EVT VT = Op0.getValueType();
+ SDValue NewLHS = GetSoftenedFloat(Op0);
+ SDValue NewRHS = GetSoftenedFloat(Op1);
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), Op0, Op1,
+ Chain, N->getOpcode() == ISD::STRICT_FSETCCS);
- EVT VT = NewLHS.getValueType();
- NewLHS = GetSoftenedFloat(NewLHS);
- NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
- N->getOperand(0), N->getOperand(1));
-
- // If softenSetCCOperands returned a scalar, use it.
- if (!NewRHS.getNode()) {
- assert(NewLHS.getValueType() == N->getValueType(0) &&
- "Unexpected setcc expansion!");
- return NewLHS;
+ // Update N to have the operands specified.
+ if (NewRHS.getNode()) {
+ if (IsStrict)
+ NewLHS = DAG.getNode(ISD::SETCC, SDLoc(N), N->getValueType(0), NewLHS,
+ NewRHS, DAG.getCondCode(CCCode));
+ else
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
}
- // Otherwise, update N to have the operands specified.
- return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
- DAG.getCondCode(CCCode)),
- 0);
+ // Otherwise, softenSetCCOperands returned a scalar, use it.
+ assert((NewRHS.getNode() || NewLHS.getValueType() == N->getValueType(0)) &&
+ "Unexpected setcc expansion!");
+
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 0), NewLHS);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+ return SDValue();
+ }
+ return NewLHS;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -1016,72 +994,99 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = BitConvertToInteger(N->getOperand(1));
+ SDLoc dl(N);
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LROUND_F32,
- RTLIB::LROUND_F64,
- RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT LVT = LHS.getValueType();
+ EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits());
+ EVT RVT = RHS.getValueType();
+
+ unsigned LSize = LVT.getSizeInBits();
+ unsigned RSize = RVT.getSizeInBits();
+
+ // Shift right or sign-extend it if the two operands have different types.
+ int SizeDiff = RSize - LSize;
+ if (SizeDiff > 0) {
+ RHS =
+ DAG.getNode(ISD::SRL, dl, RVT, RHS,
+ DAG.getConstant(SizeDiff, dl,
+ TLI.getShiftAmountTy(RHS.getValueType(),
+ DAG.getDataLayout())));
+ RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS);
+ } else if (SizeDiff < 0) {
+ RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS);
+ RHS =
+ DAG.getNode(ISD::SHL, dl, ILVT, RHS,
+ DAG.getConstant(-SizeDiff, dl,
+ TLI.getShiftAmountTy(RHS.getValueType(),
+ DAG.getDataLayout())));
+ }
+
+ RHS = DAG.getBitcast(LVT, RHS);
+ return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64,
- RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+ EVT OpVT = N->getOperand(0 + Offset).getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+ CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict) {
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
+ }
+
+ return Tmp.first;
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+}
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LRINT_F32,
- RTLIB::LRINT_F64,
- RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+}
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- TargetLowering::MakeLibCallOptions CallOptions;
- EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
- return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
- RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64,
- RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128),
- NVT, Op, CallOptions, SDLoc(N)).first;
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+ EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+ return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
}
//===----------------------------------------------------------------------===//
@@ -1122,35 +1127,61 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
+ case ISD::STRICT_FMINNUM:
case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+ case ISD::STRICT_FMAXNUM:
case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+ case ISD::STRICT_FADD:
case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break;
+ case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break;
+ case ISD::STRICT_FCEIL:
case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::STRICT_FCOS:
case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break;
+ case ISD::STRICT_FDIV:
case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break;
+ case ISD::STRICT_FEXP:
case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break;
+ case ISD::STRICT_FEXP2:
case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+ case ISD::STRICT_FFLOOR:
case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG:
case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG2:
case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+ case ISD::STRICT_FLOG10:
case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+ case ISD::STRICT_FMA:
case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break;
+ case ISD::STRICT_FMUL:
case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break;
+ case ISD::STRICT_FNEARBYINT:
case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break;
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+ case ISD::STRICT_FPOW:
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
+ case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
+ case ISD::STRICT_FROUND:
case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break;
+ case ISD::STRICT_FSIN:
case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break;
+ case ISD::STRICT_FSQRT:
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+ case ISD::STRICT_FSUB:
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+ case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
}
@@ -1174,6 +1205,36 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
dl, NVT);
}
+void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Op = N->getOperand(0 + Offset);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ Op, CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi) {
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+ Ops, CallOptions, SDLoc(N),
+ Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
@@ -1190,181 +1251,159 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FMIN_F32, RTLIB::FMIN_F64,
- RTLIB::FMIN_F80, RTLIB::FMIN_F128,
- RTLIB::FMIN_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+ RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+ RTLIB::FMIN_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FMAX_F32, RTLIB::FMAX_F64,
- RTLIB::FMAX_F80, RTLIB::FMAX_F128,
- RTLIB::FMAX_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+ RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+ RTLIB::FMAX_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::ADD_F32, RTLIB::ADD_F64,
- RTLIB::ADD_F80, RTLIB::ADD_F128,
- RTLIB::ADD_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ADD_F32, RTLIB::ADD_F64,
+ RTLIB::ADD_F80, RTLIB::ADD_F128,
+ RTLIB::ADD_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
+ RTLIB::CBRT_F64, RTLIB::CBRT_F80,
+ RTLIB::CBRT_F128,
+ RTLIB::CBRT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::CEIL_F32, RTLIB::CEIL_F64,
- RTLIB::CEIL_F80, RTLIB::CEIL_F128,
- RTLIB::CEIL_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+ RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+ RTLIB::CEIL_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::COPYSIGN_F32,
- RTLIB::COPYSIGN_F64,
- RTLIB::COPYSIGN_F80,
- RTLIB::COPYSIGN_F128,
- RTLIB::COPYSIGN_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COPYSIGN_F32,
+ RTLIB::COPYSIGN_F64,
+ RTLIB::COPYSIGN_F80,
+ RTLIB::COPYSIGN_F128,
+ RTLIB::COPYSIGN_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::COS_F32, RTLIB::COS_F64,
- RTLIB::COS_F80, RTLIB::COS_F128,
- RTLIB::COS_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::COS_F32, RTLIB::COS_F64,
+ RTLIB::COS_F80, RTLIB::COS_F128,
+ RTLIB::COS_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::DIV_F32,
- RTLIB::DIV_F64,
- RTLIB::DIV_F80,
- RTLIB::DIV_F128,
- RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::DIV_F32,
+ RTLIB::DIV_F64,
+ RTLIB::DIV_F80,
+ RTLIB::DIV_F128,
+ RTLIB::DIV_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::EXP_F32, RTLIB::EXP_F64,
- RTLIB::EXP_F80, RTLIB::EXP_F128,
- RTLIB::EXP_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP_F32, RTLIB::EXP_F64,
+ RTLIB::EXP_F80, RTLIB::EXP_F128,
+ RTLIB::EXP_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::EXP2_F32, RTLIB::EXP2_F64,
- RTLIB::EXP2_F80, RTLIB::EXP2_F128,
- RTLIB::EXP2_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+ RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+ RTLIB::EXP2_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
- RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
- RTLIB::FLOOR_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+ RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+ RTLIB::FLOOR_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG_F32, RTLIB::LOG_F64,
- RTLIB::LOG_F80, RTLIB::LOG_F128,
- RTLIB::LOG_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG_F32, RTLIB::LOG_F64,
+ RTLIB::LOG_F80, RTLIB::LOG_F128,
+ RTLIB::LOG_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG2_F32, RTLIB::LOG2_F64,
- RTLIB::LOG2_F80, RTLIB::LOG2_F128,
- RTLIB::LOG2_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+ RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+ RTLIB::LOG2_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::LOG10_F32, RTLIB::LOG10_F64,
- RTLIB::LOG10_F80, RTLIB::LOG10_F128,
- RTLIB::LOG10_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+ RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+ RTLIB::LOG10_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ bool IsStrict = N->isStrictFPOpcode();
+ unsigned Offset = IsStrict ? 1 : 0;
+ SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset),
+ N->getOperand(2 + Offset) };
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ SDLoc(N), Chain);
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ GetPairElements(Tmp.first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
- RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ RTLIB::MUL_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::NEARBYINT_F32,
- RTLIB::NEARBYINT_F64,
- RTLIB::NEARBYINT_F80,
- RTLIB::NEARBYINT_F128,
- RTLIB::NEARBYINT_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::NEARBYINT_F32,
+ RTLIB::NEARBYINT_F64,
+ RTLIB::NEARBYINT_F80,
+ RTLIB::NEARBYINT_F128,
+ RTLIB::NEARBYINT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
@@ -1379,106 +1418,105 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ bool IsStrict = N->isStrictFPOpcode();
+
+ SDValue Chain;
+ if (IsStrict) {
+ // If the expanded type is the same as the input type, just bypass the node.
+ if (NVT == N->getOperand(1).getValueType()) {
+ Hi = N->getOperand(1);
+ Chain = N->getOperand(0);
+ } else {
+ // Other we need to extend.
+ Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ Chain = Hi.getValue(1);
+ }
+ } else {
+ Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+ }
+
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Chain);
}
void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::POW_F32, RTLIB::POW_F64,
- RTLIB::POW_F80, RTLIB::POW_F128,
- RTLIB::POW_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POW_F32, RTLIB::POW_F64,
+ RTLIB::POW_F80, RTLIB::POW_F128,
+ RTLIB::POW_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::POWI_F32, RTLIB::POWI_F64,
- RTLIB::POWI_F80, RTLIB::POWI_F128,
- RTLIB::POWI_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::POWI_F32, RTLIB::POWI_F64,
+ RTLIB::POWI_F80, RTLIB::POWI_F128,
+ RTLIB::POWI_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::REM_F32, RTLIB::REM_F64,
- RTLIB::REM_F80, RTLIB::REM_F128,
- RTLIB::REM_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::REM_F32, RTLIB::REM_F64,
+ RTLIB::REM_F80, RTLIB::REM_F128,
+ RTLIB::REM_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::RINT_F32, RTLIB::RINT_F64,
- RTLIB::RINT_F80, RTLIB::RINT_F128,
- RTLIB::RINT_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::RINT_F32, RTLIB::RINT_F64,
+ RTLIB::RINT_F80, RTLIB::RINT_F128,
+ RTLIB::RINT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::ROUND_F32,
- RTLIB::ROUND_F64,
- RTLIB::ROUND_F80,
- RTLIB::ROUND_F128,
- RTLIB::ROUND_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::ROUND_F32,
+ RTLIB::ROUND_F64,
+ RTLIB::ROUND_F80,
+ RTLIB::ROUND_F128,
+ RTLIB::ROUND_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::SIN_F32, RTLIB::SIN_F64,
- RTLIB::SIN_F80, RTLIB::SIN_F128,
- RTLIB::SIN_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SIN_F32, RTLIB::SIN_F64,
+ RTLIB::SIN_F80, RTLIB::SIN_F128,
+ RTLIB::SIN_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::SQRT_F32, RTLIB::SQRT_F64,
- RTLIB::SQRT_F80, RTLIB::SQRT_F128,
- RTLIB::SQRT_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+ RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+ RTLIB::SQRT_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
SDValue &Hi) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- TargetLowering::MakeLibCallOptions CallOptions;
- SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
- RTLIB::SUB_F32,
- RTLIB::SUB_F64,
- RTLIB::SUB_F80,
- RTLIB::SUB_F128,
- RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, CallOptions,
- SDLoc(N)).first;
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::SUB_F32,
+ RTLIB::SUB_F64,
+ RTLIB::SUB_F80,
+ RTLIB::SUB_F128,
+ RTLIB::SUB_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
- RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
- RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
- RTLIB::TRUNC_PPCF128),
- N, false);
- GetPairElements(Call, Lo, Hi);
+ ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+ RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+ RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+ RTLIB::TRUNC_PPCF128), Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
@@ -1619,8 +1657,11 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break;
case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break;
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
@@ -1709,34 +1750,72 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
- assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+ bool IsStrict = N->isStrictFPOpcode();
+ assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 &&
"Logic only correct for ppcf128!");
SDValue Lo, Hi;
- GetExpandedFloat(N->getOperand(0), Lo, Hi);
- // Round it the rest of the way (e.g. to f32) if needed.
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
- N->getValueType(0), Hi, N->getOperand(1));
+ GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi);
+
+ if (!IsStrict)
+ // Round it the rest of the way (e.g. to f32) if needed.
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0), Hi, N->getOperand(1));
+
+ // Eliminate the node if the input float type is the same as the output float
+ // type.
+ if (Hi.getValueType() == N->getValueType(0)) {
+ // Connect the output chain to the input chain, unlinking the node.
+ ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+ ReplaceValueWith(SDValue(N, 0), Hi);
+ return SDValue();
+ }
+
+ SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ {N->getValueType(0), MVT::Other},
+ {N->getOperand(0), Hi, N->getOperand(2)});
+ ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1));
+ ReplaceValueWith(SDValue(N, 0), Expansion);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
TargetLowering::MakeLibCallOptions CallOptions;
- return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, dl, Chain);
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
EVT RVT = N->getValueType(0);
SDLoc dl(N);
- RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
TargetLowering::MakeLibCallOptions CallOptions;
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
- CallOptions, dl).first;
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+ CallOptions, dl, Chain);
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1800,7 +1879,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
@@ -1813,7 +1892,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
@@ -1826,7 +1905,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
@@ -1839,7 +1918,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
- EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ EVT RetVT = N->getOperand(0).getValueType();
TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
@@ -1963,12 +2042,11 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
// code.
SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
EVT VT = N->getValueType(0);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
SDValue Op1 = GetPromotedFloat(N->getOperand(1));
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
- return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+ return DAG.getSetCC(SDLoc(N), VT, Op0, Op1, CCCode);
}
@@ -2026,6 +2104,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
// Unary FP Operations
case ISD::FABS:
+ case ISD::FCBRT:
case ISD::FCEIL:
case ISD::FCOS:
case ISD::FEXP:
@@ -2304,7 +2383,6 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
EVT VT = N->getValueType(0);
- EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
AtomicSDNode *AM = cast<AtomicSDNode>(N);
SDLoc SL(N);
@@ -2318,13 +2396,19 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
{ AM->getChain(), AM->getBasePtr(), CastVal },
AM->getMemOperand());
- SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
- NewAtomic);
+ SDValue Result = NewAtomic;
+
+ if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) {
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+ NewAtomic);
+ }
+
// Legalize the chain result by replacing uses of the old value chain with the
// new one
ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
- return ResultCast;
+ return Result;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d5c1b539adbd..0e46f8d68f83 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -75,6 +75,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break;
case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break;
case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
@@ -158,6 +160,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break;
+
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
@@ -337,8 +342,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
// The input is widened to the same size. Convert to the widened value.
// Make sure that the outgoing value is not a vector, because this would
// make us bitcast between two vectors which are legalized in different ways.
- if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
- return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+ if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) {
+ SDValue Res =
+ DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+
+ // For big endian targets we need to shift the casted value or the
+ // interesting bits will end up at the wrong place.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
+ assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
+ Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
+ DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ }
+ return Res;
+ }
// If the output type is also a vector and widening it to the same size
// as the widened input type would be a legal type, we can widen the bitcast
// and handle the promotion after.
@@ -365,15 +383,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
- const TargetLowering &TLI,
+static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
SelectionDAG &DAG) {
EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If the value won't fit in the prefered type, just use something safe. It
- // will be legalized when the shift is expanded.
- if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. It will be legalized when the shift is expanded.
+ if (!ShiftVT.isVector() &&
+ ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
ShiftVT = MVT::i32;
return ShiftVT;
}
@@ -385,7 +403,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -397,7 +415,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
SDLoc dl(N);
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+ EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -592,8 +610,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
- N->getMask(), ExtPassThru, N->getMemoryVT(),
- N->getMemOperand(), ISD::EXTLOAD);
+ N->getOffset(), N->getMask(), ExtPassThru,
+ N->getMemoryVT(), N->getMemOperand(),
+ N->getAddressingMode(), ISD::EXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -604,7 +623,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
assert(NVT == ExtPassThru.getValueType() &&
- "Gather result type and the passThru agrument type should be the same");
+ "Gather result type and the passThru argument type should be the same");
SDLoc dl(N);
SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
@@ -762,6 +781,71 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
N->getOperand(2));
}
+static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
+ unsigned Scale, const TargetLowering &TLI,
+ SelectionDAG &DAG) {
+ EVT VT = LHS.getValueType();
+ bool Signed = N->getOpcode() == ISD::SDIVFIX;
+
+ SDLoc dl(N);
+ // See if we can perform the division in this type without widening.
+ if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+ DAG))
+ return V;
+
+ // If that didn't work, double the type width and try again. That must work,
+ // or something is wrong.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getScalarSizeInBits() * 2);
+ if (Signed) {
+ LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
+ RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
+ } else {
+ LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
+ RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
+ }
+
+ // TODO: Saturation.
+
+ SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+ DAG);
+ assert(Res && "Expanding DIVFIX with wide type failed?");
+ return DAG.getZExtOrTrunc(Res, dl, VT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed = N->getOpcode() == ISD::SDIVFIX;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT PromotedType = Op1Promoted.getValueType();
+ unsigned Scale = N->getConstantOperandVal(2);
+
+ SDValue Res;
+ // If the type is already legal and the operation is legal in that type, we
+ // should not early expand.
+ if (TLI.isTypeLegal(PromotedType)) {
+ TargetLowering::LegalizeAction Action =
+ TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
+ if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
+ Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ }
+
+ if (!Res)
+ Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
+
+ // TODO: Saturation.
+
+ return Res;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
@@ -816,7 +900,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
- EVT InVT = N->getOperand(0).getValueType();
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT SVT = getSetCCResultType(InVT);
@@ -835,12 +920,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
}
SDLoc dl(N);
- assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+ assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
"Vector compare must return a vector result!");
// Get the SETCC result using the canonical SETCC type.
- SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
- N->getOperand(1), N->getOperand(2));
+ SDValue SetCC;
+ if (N->isStrictFPOpcode()) {
+ EVT VTs[] = {SVT, MVT::Other};
+ SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3)};
+ SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
+ } else
+ SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
@@ -1058,8 +1153,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
- TLI, DAG);
+ EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1176,6 +1270,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
+ case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
@@ -1189,6 +1284,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
+ case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
@@ -1209,7 +1305,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
- case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1465,6 +1563,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SExtPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ SExtPromotedInteger(N->getOperand(1))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
@@ -1486,11 +1589,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
SDLoc dl(N);
bool TruncateStore = false;
- if (OpNo == 3) {
+ if (OpNo == 4) {
Mask = PromoteTargetBoolean(Mask, DataVT);
// Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
- NewOps[3] = Mask;
+ NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
} else { // Data operand
assert(OpNo == 1 && "Unexpected operand for promotion");
@@ -1498,14 +1601,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
TruncateStore = true;
}
- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
- N->getMemoryVT(), N->getMemOperand(),
+ return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+ N->getOffset(), Mask, N->getMemoryVT(),
+ N->getMemOperand(), N->getAddressingMode(),
TruncateStore, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
unsigned OpNo) {
- assert(OpNo == 2 && "Only know how to promote the mask!");
+ assert(OpNo == 3 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
@@ -1563,6 +1667,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
ZExtPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetPromotedInteger(N->getOperand(0));
@@ -1584,7 +1693,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
return SDValue(
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1697,10 +1806,14 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break;
case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
- case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break;
- case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break;
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_LLRINT:
+ case ISD::LLROUND:
+ case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -1794,6 +1907,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UMULFIX:
case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
@@ -1817,7 +1933,11 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
- return ExpandChainLibCall(LC, Node, false);
+ EVT RetVT = Node->getValueType(0);
+ SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
+ Node->getOperand(0));
}
/// N is a shift by a value that needs to be expanded,
@@ -2304,11 +2424,27 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue Ovf;
- bool HasOpCarry = TLI.isOperationLegalOrCustom(
- N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
- TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+ unsigned CarryOp, NoCarryOp;
+ ISD::CondCode Cond;
+ switch(N->getOpcode()) {
+ case ISD::UADDO:
+ CarryOp = ISD::ADDCARRY;
+ NoCarryOp = ISD::ADD;
+ Cond = ISD::SETULT;
+ break;
+ case ISD::USUBO:
+ CarryOp = ISD::SUBCARRY;
+ NoCarryOp = ISD::SUB;
+ Cond = ISD::SETUGT;
+ break;
+ default:
+ llvm_unreachable("Node has unexpected Opcode");
+ }
- if (HasOpCarry) {
+ bool HasCarryOp = TLI.isOperationLegalOrCustom(
+ CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+ if (HasCarryOp) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
GetExpandedInteger(LHS, LHSL, LHSH);
@@ -2317,22 +2453,19 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
- unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(Opc, dl, VTList, HiOps);
+ Hi = DAG.getNode(CarryOp, dl, VTList, HiOps);
Ovf = Hi.getValue(1);
} else {
// Expand the result by simply replacing it with the equivalent
// non-overflow-checking operation.
- auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
- SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS);
+ SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(Sum, Lo, Hi);
// Calculate the overflow: addition overflows iff a + b < a, and subtraction
// overflows iff a - b > a.
- auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
}
@@ -2544,7 +2677,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
@@ -2552,8 +2687,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
- Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+ CallOptions, dl, Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
@@ -2561,75 +2700,94 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT VT = N->getValueType(0);
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
Op = GetPromotedFloat(Op);
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
TargetLowering::MakeLibCallOptions CallOptions;
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
- Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+ CallOptions, dl, Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (IsStrict)
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
-void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- if (VT == MVT::f32)
- LC = RTLIB::LLROUND_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LLROUND_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LLROUND_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LLROUND_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LLROUND_PPCF128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
- SDValue Op = N->getOperand(0);
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
- Op = GetPromotedFloat(Op);
+ assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
+ "Input type needs to be promoted!");
- SDLoc dl(N);
- EVT RetVT = N->getValueType(0);
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
- Lo, Hi);
-}
+ EVT VT = Op.getValueType();
-void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
- if (VT == MVT::f32)
- LC = RTLIB::LLRINT_F32;
- else if (VT == MVT::f64)
- LC = RTLIB::LLRINT_F64;
- else if (VT == MVT::f80)
- LC = RTLIB::LLRINT_F80;
- else if (VT == MVT::f128)
- LC = RTLIB::LLRINT_F128;
- else if (VT == MVT::ppcf128)
- LC = RTLIB::LLRINT_PPCF128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
-
- SDValue Op = N->getOperand(0);
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
- Op = GetPromotedFloat(Op);
+ if (N->getOpcode() == ISD::LLROUND ||
+ N->getOpcode() == ISD::STRICT_LLROUND) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LLROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+ } else if (N->getOpcode() == ISD::LLRINT ||
+ N->getOpcode() == ISD::STRICT_LLRINT) {
+ if (VT == MVT::f32)
+ LC = RTLIB::LLRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+ } else
+ llvm_unreachable("Unexpected opcode!");
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
+ SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();
+
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
- Lo, Hi);
+ std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+ Op, CallOptions, dl,
+ Chain);
+ SplitInteger(Tmp.first, Lo, Hi);
+
+ if (N->isStrictFPOpcode())
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
}
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
+ if (N->isAtomic()) {
+ // It's typical to have larger CAS than atomic load instructions.
+ SDLoc dl(N);
+ EVT VT = N->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ VT, VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+ return;
+ }
+
if (ISD::isNormalLoad(N)) {
ExpandRes_NormalLoad(N, Lo, Hi);
return;
@@ -2684,8 +2842,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
@@ -2709,8 +2866,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Alignment, MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
// Load the rest of the low bits.
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3068,6 +3224,13 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
+void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+ N->getConstantOperandVal(2), TLI, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
SDValue &Lo, SDValue &Hi) {
SDValue LHS = Node->getOperand(0);
@@ -3596,9 +3759,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break;
+ case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break;
+ case ISD::STRICT_UINT_TO_FP:
case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break;
case ISD::SHL:
@@ -3865,17 +4030,37 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
- SDValue Op = N->getOperand(0);
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ if (N->isAtomic()) {
+ // It's typical to have larger CAS than atomic store instructions.
+ SDLoc dl(N);
+ SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+ N->getMemoryVT(),
+ N->getOperand(0), N->getOperand(2),
+ N->getOperand(1),
+ N->getMemOperand());
+ return Swap.getValue(1);
+ }
if (ISD::isNormalStore(N))
return ExpandOp_NormalStore(N, OpNo);
@@ -3965,81 +4150,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
}
SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+ SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT DstVT = N->getValueType(0);
- SDLoc dl(N);
-
- // The following optimization is valid only if every value in SrcVT (when
- // treated as signed) is representable in DstVT. Check that the mantissa
- // size of DstVT is >= than the number of bits in SrcVT -1.
- const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
- if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
- TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
- // Do a signed conversion then adjust the result.
- SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
- SignedConv = TLI.LowerOperation(SignedConv, DAG);
-
- // The result of the signed conversion needs adjusting if the 'sign bit' of
- // the incoming integer was set. To handle this, we dynamically test to see
- // if it is set, and, if so, add a fudge factor.
-
- const uint64_t F32TwoE32 = 0x4F800000ULL;
- const uint64_t F32TwoE64 = 0x5F800000ULL;
- const uint64_t F32TwoE128 = 0x7F800000ULL;
-
- APInt FF(32, 0);
- if (SrcVT == MVT::i32)
- FF = APInt(32, F32TwoE32);
- else if (SrcVT == MVT::i64)
- FF = APInt(32, F32TwoE64);
- else if (SrcVT == MVT::i128)
- FF = APInt(32, F32TwoE128);
- else
- llvm_unreachable("Unsupported UINT_TO_FP!");
-
- // Check whether the sign bit is set.
- SDValue Lo, Hi;
- GetExpandedInteger(Op, Lo, Hi);
- SDValue SignSet = DAG.getSetCC(dl,
- getSetCCResultType(Hi.getValueType()),
- Hi,
- DAG.getConstant(0, dl, Hi.getValueType()),
- ISD::SETLT);
-
- // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
- SDValue FudgePtr =
- DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
- SDValue Zero = DAG.getIntPtrConstant(0, dl);
- SDValue Four = DAG.getIntPtrConstant(4, dl);
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Zero, Four);
- SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
- Zero, Four);
- unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
- FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
- FudgePtr, Offset);
- Alignment = std::min(Alignment, 4u);
-
- // Load the value out, extending it from f32 to the destination float type.
- // FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(
- ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
- Alignment);
- return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
- }
-
- // Otherwise, use a libcall.
- RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+ RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(true);
- return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+ if (!IsStrict)
+ return Tmp.first;
+
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ ReplaceValueWith(SDValue(N, 0), Tmp.first);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index b596c174a287..63ddb59fce68 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -204,7 +204,8 @@ bool DAGTypeLegalizer::run() {
// non-leaves.
for (SDNode &Node : DAG.allnodes()) {
if (Node.getNumOperands() == 0) {
- AddToWorklist(&Node);
+ Node.setNodeId(ReadyToProcess);
+ Worklist.push_back(&Node);
} else {
Node.setNodeId(Unanalyzed);
}
@@ -974,68 +975,6 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
}
-/// Convert the node into a libcall with the same prototype.
-SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
- bool isSigned) {
- TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setSExt(isSigned);
- unsigned NumOps = N->getNumOperands();
- SDLoc dl(N);
- if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
- dl).first;
- } else if (NumOps == 1) {
- SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
- dl).first;
- } else if (NumOps == 2) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
- dl).first;
- }
- SmallVector<SDValue, 8> Ops(NumOps);
- for (unsigned i = 0; i < NumOps; ++i)
- Ops[i] = N->getOperand(i);
-
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
-}
-
-/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
-/// the first operand is the in-chain.
-std::pair<SDValue, SDValue>
-DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
- bool isSigned) {
- SDValue InChain = Node->getOperand(0);
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Node->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Node->getOperand(i);
- Entry.Ty = ArgTy;
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node))
- .setChain(InChain)
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
- std::move(Args))
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned);
-
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo;
-}
-
/// Promote the given target boolean to a target boolean of the given type.
/// A target boolean is an integer value, not necessarily of type i1, the bits
/// of which conform to getBooleanContents.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4afbae69128a..faae14444d51 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -215,10 +215,7 @@ private:
SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue JoinIntegers(SDValue Lo, SDValue Hi);
- SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
- std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
- SDNode *Node, bool isSigned);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
@@ -228,11 +225,6 @@ private:
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi);
- void AddToWorklist(SDNode *N) {
- N->setNodeId(ReadyToProcess);
- Worklist.push_back(N);
- }
-
//===--------------------------------------------------------------------===//
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
@@ -337,6 +329,7 @@ private:
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
SDValue PromoteIntRes_MULFIX(SDNode *N);
+ SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
@@ -362,9 +355,11 @@ private:
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+ SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
@@ -373,7 +368,7 @@ private:
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_MULFIX(SDNode *N);
+ SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
@@ -411,8 +406,7 @@ private:
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -435,6 +429,7 @@ private:
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_DIVFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -486,6 +481,8 @@ private:
// Convert Float Results to Integer.
void SoftenFloatResult(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
+ SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
@@ -495,6 +492,7 @@ private:
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
+ SDValue SoftenFloatRes_FCBRT(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatRes_FCOS(SDNode *N);
@@ -530,9 +528,9 @@ private:
// Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
- SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
SDValue SoftenFloatOp_LROUND(SDNode *N);
@@ -542,6 +540,7 @@ private:
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+ SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
//===--------------------------------------------------------------------===//
// Float Expansion Support: LegalizeFloatTypes.cpp
@@ -559,10 +558,15 @@ private:
// Float Result Expansion.
void ExpandFloatResult(SDNode *N, unsigned ResNo);
void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+ SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FCBRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -676,7 +680,6 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
- SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -688,7 +691,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_MULFIX(SDNode *N);
+ SDValue ScalarizeVecRes_FIX(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -730,7 +733,7 @@ private:
void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
- void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -804,6 +807,7 @@ private:
SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
+ SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
@@ -833,6 +837,7 @@ private:
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 5562f400b6e1..c45c62cabc05 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -169,9 +169,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, dl,
- StackPtr.getValueType()));
+ StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the second half from the stack slot.
Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -248,6 +246,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
+ assert(!LD->isAtomic() && "Atomics can not be split");
EVT ValueVT = LD->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = LD->getChain();
@@ -262,8 +261,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
MinAlign(Alignment, IncrementSize),
@@ -459,6 +457,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDLoc dl(N);
StoreSDNode *St = cast<StoreSDNode>(N);
+ assert(!St->isAtomic() && "Atomics can not be split");
EVT ValueVT = St->getValue().getValueType();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = St->getChain();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 15c3a0b6cfad..7d0b1ee6ae07 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -75,95 +75,95 @@ class VectorLegalizer {
SDValue LegalizeOp(SDValue Op);
/// Assuming the node is legal, "legalize" the results.
- SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+ SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
+
+ /// Make sure Results are legal and update the translation cache.
+ SDValue RecursivelyLegalizeResults(SDValue Op,
+ MutableArrayRef<SDValue> Results);
+
+ /// Wrapper to interface LowerOperation with a vector of Results.
+ /// Returns false if the target wants to use default expansion. Otherwise
+ /// returns true. If return is true and the Results are empty, then the
+ /// target wants to keep the input node as is.
+ bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
/// Implements unrolling a VSETCC.
- SDValue UnrollVSETCC(SDValue Op);
+ SDValue UnrollVSETCC(SDNode *Node);
/// Implement expand-based legalization of vector operations.
///
/// This is just a high-level routine to dispatch to specific code paths for
/// operations to legalize them.
- SDValue Expand(SDValue Op);
+ void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
/// FP_TO_SINT isn't legal.
- SDValue ExpandFP_TO_UINT(SDValue Op);
+ void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
/// SINT_TO_FLOAT and SHR on vectors isn't legal.
- SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+ void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
- SDValue ExpandSEXTINREG(SDValue Op);
+ SDValue ExpandSEXTINREG(SDNode *Node);
/// Implement expansion for ANY_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and bitcasts to the proper
/// type. The contents of the bits in the extended part of each element are
/// undef.
- SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
/// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place, bitcasts to the proper
/// type, then shifts left and arithmetic shifts right to introduce a sign
/// extension.
- SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
/// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
///
/// Shuffles the low lanes of the operand into place and blends zeros into
/// the remaining lanes, finally bitcasting to the proper type.
- SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
-
- /// Implement expand-based legalization of ABS vector operations.
- /// If following expanding is legal/custom then do it:
- /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
- /// else unroll the operation.
- SDValue ExpandABS(SDValue Op);
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
/// Expand bswap of vectors into a shuffle if legal.
- SDValue ExpandBSWAP(SDValue Op);
+ SDValue ExpandBSWAP(SDNode *Node);
/// Implement vselect in terms of XOR, AND, OR when blend is not
/// supported by the target.
- SDValue ExpandVSELECT(SDValue Op);
- SDValue ExpandSELECT(SDValue Op);
- SDValue ExpandLoad(SDValue Op);
- SDValue ExpandStore(SDValue Op);
- SDValue ExpandFNEG(SDValue Op);
- SDValue ExpandFSUB(SDValue Op);
- SDValue ExpandBITREVERSE(SDValue Op);
- SDValue ExpandCTPOP(SDValue Op);
- SDValue ExpandCTLZ(SDValue Op);
- SDValue ExpandCTTZ(SDValue Op);
- SDValue ExpandFunnelShift(SDValue Op);
- SDValue ExpandROT(SDValue Op);
- SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
- SDValue ExpandUADDSUBO(SDValue Op);
- SDValue ExpandSADDSUBO(SDValue Op);
- SDValue ExpandMULO(SDValue Op);
- SDValue ExpandAddSubSat(SDValue Op);
- SDValue ExpandFixedPointMul(SDValue Op);
- SDValue ExpandStrictFPOp(SDValue Op);
+ SDValue ExpandVSELECT(SDNode *Node);
+ SDValue ExpandSELECT(SDNode *Node);
+ std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
+ SDValue ExpandStore(SDNode *N);
+ SDValue ExpandFNEG(SDNode *Node);
+ void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ SDValue ExpandFixedPointDiv(SDNode *Node);
+ SDValue ExpandStrictFPOp(SDNode *Node);
+ void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+ void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements vector promotion.
///
/// This is essentially just bitcasting the operands to a different type and
/// bitcasting the result back to the original type.
- SDValue Promote(SDValue Op);
+ void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements [SU]INT_TO_FP vector promotion.
///
/// This is a [zs]ext of the input operand to a larger integer type.
- SDValue PromoteINT_TO_FP(SDValue Op);
+ void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
/// Implements FP_TO_[SU]INT vector promotion of the result type.
///
/// It is promoted to a larger integer type. The result is then
/// truncated back to the original type.
- SDValue PromoteFP_TO_INT(SDValue Op);
+ void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
public:
VectorLegalizer(SelectionDAG& dag) :
@@ -219,11 +219,27 @@ bool VectorLegalizer::Run() {
return Changed;
}
-SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
+ assert(Op->getNumValues() == Result->getNumValues() &&
+ "Unexpected number of results");
// Generic legalization: just pass the operand through.
- for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
- AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
- return Result.getValue(Op.getResNo());
+ for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
+ AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
+ return SDValue(Result, Op.getResNo());
+}
+
+SDValue
+VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
+ MutableArrayRef<SDValue> Results) {
+ assert(Results.size() == Op->getNumValues() &&
+ "Unexpected number of results");
+ // Make sure that the generated code is itself legal.
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ Results[i] = LegalizeOp(Results[i]);
+ AddLegalizedOperand(Op.getValue(i), Results[i]);
+ }
+
+ return Results[Op.getResNo()];
}
SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
@@ -232,18 +248,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
if (I != LegalizedNodes.end()) return I->second;
- SDNode* Node = Op.getNode();
-
// Legalize the operands
SmallVector<SDValue, 8> Ops;
- for (const SDValue &Op : Node->op_values())
- Ops.push_back(LegalizeOp(Op));
+ for (const SDValue &Oper : Op->op_values())
+ Ops.push_back(LegalizeOp(Oper));
- SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
- Op.getResNo());
+ SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
if (Op.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+ LoadSDNode *LD = cast<LoadSDNode>(Node);
ISD::LoadExtType ExtType = LD->getExtensionType();
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
@@ -252,26 +265,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LD->getMemoryVT())) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Result);
- case TargetLowering::Custom:
- if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
- assert(Lowered->getNumValues() == Op->getNumValues() &&
- "Unexpected number of results");
- if (Lowered != Result) {
- // Make sure the new code is also legal.
- Lowered = LegalizeOp(Lowered);
- Changed = true;
- }
- return TranslateLegalizeResults(Op, Lowered);
+ return TranslateLegalizeResults(Op, Node);
+ case TargetLowering::Custom: {
+ SmallVector<SDValue, 2> ResultVals;
+ if (LowerOperationWrapper(Node, ResultVals)) {
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
+ Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
}
LLVM_FALLTHROUGH;
- case TargetLowering::Expand:
+ }
+ case TargetLowering::Expand: {
Changed = true;
- return ExpandLoad(Op);
+ std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
+ AddLegalizedOperand(Op.getValue(0), Tmp.first);
+ AddLegalizedOperand(Op.getValue(1), Tmp.second);
+ return Op.getResNo() ? Tmp.first : Tmp.second;
+ }
}
}
} else if (Op.getOpcode() == ISD::STORE) {
- StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
EVT StVT = ST->getMemoryVT();
MVT ValVT = ST->getValue().getSimpleValueType();
if (StVT.isVector() && ST->isTruncatingStore()) {
@@ -280,19 +296,24 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal:
- return TranslateLegalizeResults(Op, Result);
+ return TranslateLegalizeResults(Op, Node);
case TargetLowering::Custom: {
- SDValue Lowered = TLI.LowerOperation(Result, DAG);
- if (Lowered != Result) {
- // Make sure the new code is also legal.
- Lowered = LegalizeOp(Lowered);
+ SmallVector<SDValue, 1> ResultVals;
+ if (LowerOperationWrapper(Node, ResultVals)) {
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
}
- return TranslateLegalizeResults(Op, Lowered);
+ LLVM_FALLTHROUGH;
}
- case TargetLowering::Expand:
+ case TargetLowering::Expand: {
Changed = true;
- return ExpandStore(Op);
+ SDValue Chain = ExpandStore(Node);
+ AddLegalizedOperand(Op, Chain);
+ return Chain;
+ }
}
}
}
@@ -300,55 +321,41 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
bool HasVectorValueOrOp = false;
for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
HasVectorValueOrOp |= J->isVector();
- for (const SDValue &Op : Node->op_values())
- HasVectorValueOrOp |= Op.getValueType().isVector();
+ for (const SDValue &Oper : Node->op_values())
+ HasVectorValueOrOp |= Oper.getValueType().isVector();
if (!HasVectorValueOrOp)
- return TranslateLegalizeResults(Op, Result);
+ return TranslateLegalizeResults(Op, Node);
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+ EVT ValVT;
switch (Op.getOpcode()) {
default:
- return TranslateLegalizeResults(Op, Result);
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
+ return TranslateLegalizeResults(Op, Node);
+ case ISD::MERGE_VALUES:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // This operation lies about being legal: when it claims to be legal,
+ // it should actually be expanded.
+ if (Action == TargetLowering::Legal)
+ Action = TargetLowering::Expand;
+ break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ ValVT = Node->getValueType(0);
+ if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ValVT = Node->getOperand(1).getValueType();
+ Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
// If we're asked to expand a strict vector floating-point operation,
// by default we're going to simply unroll it. That is usually the
// best approach, except in the case where the resulting strict (scalar)
// operations would themselves use the fallback mutation to non-strict.
// In that specific case, just do the fallback on the vector op.
- if (Action == TargetLowering::Expand &&
- TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0))
- == TargetLowering::Legal) {
- EVT EltVT = Node->getValueType(0).getVectorElementType();
+ if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
+ TargetLowering::Legal) {
+ EVT EltVT = ValVT.getVectorElementType();
if (TLI.getOperationAction(Node->getOpcode(), EltVT)
== TargetLowering::Expand &&
TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
@@ -454,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
- case ISD::UMULFIXSAT: {
+ case ISD::UMULFIXSAT:
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -482,53 +491,90 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
+ SmallVector<SDValue, 8> ResultVals;
switch (Action) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Promote:
- Result = Promote(Op);
- Changed = true;
+ LLVM_DEBUG(dbgs() << "Promoting\n");
+ Promote(Node, ResultVals);
+ assert(!ResultVals.empty() && "No results for promotion?");
break;
case TargetLowering::Legal:
LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
break;
- case TargetLowering::Custom: {
+ case TargetLowering::Custom:
LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
- if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
- LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
- Result = Tmp1;
+ if (LowerOperationWrapper(Node, ResultVals))
break;
- }
LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
- }
case TargetLowering::Expand:
- Result = Expand(Op);
+ LLVM_DEBUG(dbgs() << "Expanding\n");
+ Expand(Node, ResultVals);
+ break;
}
- // Make sure that the generated code is itself legal.
- if (Result != Op) {
- Result = LegalizeOp(Result);
- Changed = true;
+ if (ResultVals.empty())
+ return TranslateLegalizeResults(Op, Node);
+
+ Changed = true;
+ return RecursivelyLegalizeResults(Op, ResultVals);
+}
+
+// FIME: This is very similar to the X86 override of
+// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+
+ if (!Res.getNode())
+ return false;
+
+ if (Res == SDValue(Node, 0))
+ return true;
+
+ // If the original node has one result, take the return value from
+ // LowerOperation as is. It might not be result number 0.
+ if (Node->getNumValues() == 1) {
+ Results.push_back(Res);
+ return true;
}
- // Note that LegalizeOp may be reentered even from single-use nodes, which
- // means that we always must cache transformed nodes.
- AddLegalizedOperand(Op, Result);
- return Result;
+ // If the original node has multiple results, then the return node should
+ // have the same number of results.
+ assert((Node->getNumValues() == Res->getNumValues()) &&
+ "Lowering returned the wrong number of results!");
+
+ // Places new result values base on N result number.
+ for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
+ Results.push_back(Res.getValue(I));
+
+ return true;
}
-SDValue VectorLegalizer::Promote(SDValue Op) {
+void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
- switch (Op.getOpcode()) {
+ switch (Node->getOpcode()) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
// "Promote" the operation by extending the operand.
- return PromoteINT_TO_FP(Op);
+ PromoteINT_TO_FP(Node, Results);
+ return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
// Promote the operation by extending the operand.
- return PromoteFP_TO_INT(Op);
+ PromoteFP_TO_INT(Node, Results);
+ return;
+ case ISD::FP_ROUND:
+ case ISD::FP_EXTEND:
+ // These operations are used to do promotion so they can't be promoted
+ // themselves.
+ llvm_unreachable("Don't know how to promote this operation!");
}
// There are currently two cases of vector promotion:
@@ -536,91 +582,128 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
// same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
// 2) Extending a vector of floats to a vector of the same number of larger
// floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
- MVT VT = Op.getSimpleValueType();
- assert(Op.getNode()->getNumValues() == 1 &&
+ assert(Node->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
- MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
- SDLoc dl(Op);
- SmallVector<SDValue, 4> Operands(Op.getNumOperands());
-
- for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
- if (Op.getOperand(j).getValueType().isVector())
- if (Op.getOperand(j)
+ MVT VT = Node->getSimpleValueType(0);
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ SDLoc dl(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+ if (Node->getOperand(j).getValueType().isVector())
+ if (Node->getOperand(j)
.getValueType()
.getVectorElementType()
.isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
- Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
else
- Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
else
- Operands[j] = Op.getOperand(j);
+ Operands[j] = Node->getOperand(j);
}
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
+
if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
(VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
- return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
+ Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl));
else
- return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+
+ Results.push_back(Res);
}
-SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
+void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
- MVT VT = Op.getOperand(0).getSimpleValueType();
- MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+ bool IsStrict = Node->isStrictFPOpcode();
+ MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
"Vectors have different number of elements!");
- SDLoc dl(Op);
- SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+ SDLoc dl(Node);
+ SmallVector<SDValue, 4> Operands(Node->getNumOperands());
- unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
- ISD::SIGN_EXTEND;
- for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
- if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+ unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
+ Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
+ ? ISD::ZERO_EXTEND
+ : ISD::SIGN_EXTEND;
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+ if (Node->getOperand(j).getValueType().isVector())
+ Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
else
- Operands[j] = Op.getOperand(j);
+ Operands[j] = Node->getOperand(j);
+ }
+
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(Node->getOpcode(), dl,
+ {Node->getValueType(0), MVT::Other}, Operands);
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ return;
}
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
+ Results.push_back(Res);
}
// For FP_TO_INT we promote the result type to a vector type with wider
// elements and then truncate the result. This is different from the default
// PromoteVector which uses bitcast to promote thus assumning that the
// promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
- MVT VT = Op.getSimpleValueType();
- MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VT = Node->getSimpleValueType(0);
+ MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+ bool IsStrict = Node->isStrictFPOpcode();
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
"Vectors have different number of elements!");
- unsigned NewOpc = Op->getOpcode();
+ unsigned NewOpc = Node->getOpcode();
// Change FP_TO_UINT to FP_TO_SINT if possible.
// TODO: Should we only do this if FP_TO_UINT itself isn't legal?
if (NewOpc == ISD::FP_TO_UINT &&
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
NewOpc = ISD::FP_TO_SINT;
- SDLoc dl(Op);
- SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
+ if (NewOpc == ISD::STRICT_FP_TO_UINT &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDLoc dl(Node);
+ SDValue Promoted, Chain;
+ if (IsStrict) {
+ Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+ {Node->getOperand(0), Node->getOperand(1)});
+ Chain = Promoted.getValue(1);
+ } else
+ Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
// original operation was undefined anyway, so the assert is still correct.
- Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
- : ISD::AssertSext,
- dl, NVT, Promoted,
+ if (Node->getOpcode() == ISD::FP_TO_UINT ||
+ Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
+ NewOpc = ISD::AssertZext;
+ else
+ NewOpc = ISD::AssertSext;
+
+ Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
DAG.getValueType(VT.getScalarType()));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+ Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+ Results.push_back(Promoted);
+ if (IsStrict)
+ Results.push_back(Chain);
}
-SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
- LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
EVT SrcVT = LD->getMemoryVT();
EVT SrcEltVT = SrcVT.getScalarType();
@@ -629,7 +712,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
SDValue NewChain;
SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
- SDLoc dl(Op);
+ SDLoc dl(N);
SmallVector<SDValue, 8> Vals;
SmallVector<SDValue, 8> LoadChains;
@@ -741,130 +824,157 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
+ Value = DAG.getBuildVector(N->getValueType(0), dl, Vals);
} else {
- SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
- // Skip past MERGE_VALUE node if known.
- if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
- NewChain = Scalarized.getOperand(1);
- Value = Scalarized.getOperand(0);
- } else {
- NewChain = Scalarized.getValue(1);
- Value = Scalarized.getValue(0);
- }
+ std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
}
- AddLegalizedOperand(Op.getValue(0), Value);
- AddLegalizedOperand(Op.getValue(1), NewChain);
-
- return (Op.getResNo() ? NewChain : Value);
+ return std::make_pair(Value, NewChain);
}
-SDValue VectorLegalizer::ExpandStore(SDValue Op) {
- StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+SDValue VectorLegalizer::ExpandStore(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
- AddLegalizedOperand(Op, TF);
return TF;
}
-SDValue VectorLegalizer::Expand(SDValue Op) {
- switch (Op->getOpcode()) {
+void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDValue Tmp;
+ switch (Node->getOpcode()) {
+ case ISD::MERGE_VALUES:
+ for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+ Results.push_back(Node->getOperand(i));
+ return;
case ISD::SIGN_EXTEND_INREG:
- return ExpandSEXTINREG(Op);
+ Results.push_back(ExpandSEXTINREG(Node));
+ return;
case ISD::ANY_EXTEND_VECTOR_INREG:
- return ExpandANY_EXTEND_VECTOR_INREG(Op);
+ Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
+ return;
case ISD::SIGN_EXTEND_VECTOR_INREG:
- return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+ Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
+ return;
case ISD::ZERO_EXTEND_VECTOR_INREG:
- return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+ Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
+ return;
case ISD::BSWAP:
- return ExpandBSWAP(Op);
+ Results.push_back(ExpandBSWAP(Node));
+ return;
case ISD::VSELECT:
- return ExpandVSELECT(Op);
+ Results.push_back(ExpandVSELECT(Node));
+ return;
case ISD::SELECT:
- return ExpandSELECT(Op);
+ Results.push_back(ExpandSELECT(Node));
+ return;
case ISD::FP_TO_UINT:
- return ExpandFP_TO_UINT(Op);
+ ExpandFP_TO_UINT(Node, Results);
+ return;
case ISD::UINT_TO_FP:
- return ExpandUINT_TO_FLOAT(Op);
+ ExpandUINT_TO_FLOAT(Node, Results);
+ return;
case ISD::FNEG:
- return ExpandFNEG(Op);
+ Results.push_back(ExpandFNEG(Node));
+ return;
case ISD::FSUB:
- return ExpandFSUB(Op);
+ ExpandFSUB(Node, Results);
+ return;
case ISD::SETCC:
- return UnrollVSETCC(Op);
+ Results.push_back(UnrollVSETCC(Node));
+ return;
case ISD::ABS:
- return ExpandABS(Op);
+ if (TLI.expandABS(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::BITREVERSE:
- return ExpandBITREVERSE(Op);
+ ExpandBITREVERSE(Node, Results);
+ return;
case ISD::CTPOP:
- return ExpandCTPOP(Op);
+ if (TLI.expandCTPOP(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- return ExpandCTLZ(Op);
+ if (TLI.expandCTLZ(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return ExpandCTTZ(Op);
+ if (TLI.expandCTTZ(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::FSHL:
case ISD::FSHR:
- return ExpandFunnelShift(Op);
+ if (TLI.expandFunnelShift(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::ROTL:
case ISD::ROTR:
- return ExpandROT(Op);
+ if (TLI.expandROT(Node, Tmp, DAG)) {
+ Results.push_back(Tmp);
+ return;
+ }
+ break;
case ISD::FMINNUM:
case ISD::FMAXNUM:
- return ExpandFMINNUM_FMAXNUM(Op);
+ if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::UADDO:
case ISD::USUBO:
- return ExpandUADDSUBO(Op);
+ ExpandUADDSUBO(Node, Results);
+ return;
case ISD::SADDO:
case ISD::SSUBO:
- return ExpandSADDSUBO(Op);
+ ExpandSADDSUBO(Node, Results);
+ return;
case ISD::UMULO:
case ISD::SMULO:
- return ExpandMULO(Op);
+ ExpandMULO(Node, Results);
+ return;
case ISD::USUBSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
case ISD::SADDSAT:
- return ExpandAddSubSat(Op);
+ if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SMULFIX:
case ISD::UMULFIX:
- return ExpandFixedPointMul(Op);
+ if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::SMULFIXSAT:
case ISD::UMULFIXSAT:
// FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
// why. Maybe it results in worse codegen compared to the unroll for some
// targets? This should probably be investigated. And if we still prefer to
// unroll an explanation could be helpful.
- return DAG.UnrollVectorOp(Op.getNode());
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- return ExpandStrictFPOp(Op);
+ break;
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ Results.push_back(ExpandFixedPointDiv(Node));
+ return;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+ ExpandStrictFPOp(Node, Results);
+ return;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
@@ -878,22 +988,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
- return TLI.expandVecReduce(Op.getNode(), DAG);
- default:
- return DAG.UnrollVectorOp(Op.getNode());
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ return;
}
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
}
-SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
// using XOR AND OR. The selector bit is broadcasted.
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
- SDValue Mask = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
@@ -907,7 +1018,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
// Generate a mask operand.
EVT MaskTy = VT.changeVectorElementTypeToInteger();
@@ -936,36 +1047,35 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
- return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
}
-SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
// Make sure that the SRA and SHL instructions are available.
if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
- SDLoc DL(Op);
- EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+ SDLoc DL(Node);
+ EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
unsigned BW = VT.getScalarSizeInBits();
unsigned OrigBW = OrigTy.getScalarSizeInBits();
SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
- Op = Op.getOperand(0);
- Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+ SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
// Generically expand a vector anyext in register to a shuffle of the relevant
// lanes into the appropriate locations, with other lanes left undef.
-SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
int NumElements = VT.getVectorNumElements();
- SDValue Src = Op.getOperand(0);
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
@@ -997,15 +1107,15 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
}
-SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue Src = Op.getOperand(0);
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
// First build an any-extend node which can be legalized above when we
// recurse through it.
- Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
// Now we need sign extend. Do this by shifting the elements. Even if these
// aren't legal operations, they have a better chance of being legalized
@@ -1021,11 +1131,11 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
// Generically expand a vector zext in register to a shuffle of the relevant
// lanes into the appropriate locations, a blend of zero into the high bits,
// and a bitcast to the wider element type.
-SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
int NumElements = VT.getVectorNumElements();
- SDValue Src = Op.getOperand(0);
+ SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
@@ -1068,8 +1178,8 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
}
-SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
// Generate a byte wise shuffle mask for the BSWAP.
SmallVector<int, 16> ShuffleMask;
@@ -1078,20 +1188,24 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
// Only emit a shuffle if the mask is legal.
if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
- SDLoc DL(Op);
- Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, Op);
}
-SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
- EVT VT = Op.getValueType();
+void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT VT = Node->getValueType(0);
// If we have the scalar operation, it's probably cheaper to unroll it.
- if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
- return DAG.UnrollVectorOp(Op.getNode());
+ if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
+ return;
+ }
// If the vector element width is a whole number of bytes, test if its legal
// to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
@@ -1108,35 +1222,39 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
- SDLoc DL(Op);
- Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
BSWAPMask);
Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ Results.push_back(Op);
+ return;
}
}
// If we have the appropriate vector bit operations, it is better to use them
// than unrolling and expanding each component.
- if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
- !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
- !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
- !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- return DAG.UnrollVectorOp(Op.getNode());
-
- // Let LegalizeDAG handle this later.
- return Op;
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ // Let LegalizeDAG handle this later.
+ return;
+
+ // Otherwise unroll.
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
}
-SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
- SDLoc DL(Op);
+ SDLoc DL(Node);
- SDValue Mask = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Op2 = Op.getOperand(2);
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
EVT VT = Mask.getValueType();
@@ -1152,13 +1270,13 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
TLI.getBooleanContents(Op1.getValueType()) !=
TargetLowering::ZeroOrNegativeOneBooleanContent)
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
// If the mask and the type are different sizes, unroll the vector op. This
// can occur when getSetCCResultType returns something that is different in
// size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
if (VT.getSizeInBits() != Op1.getValueSizeInBits())
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1173,46 +1291,61 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
- return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
-}
-
-SDValue VectorLegalizer::ExpandABS(SDValue Op) {
- // Attempt to expand using TargetLowering.
- SDValue Result;
- if (TLI.expandABS(Op.getNode(), Result, DAG))
- return Result;
-
- // Otherwise go ahead and unroll.
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
}
-SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
SDValue Result, Chain;
- if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
- if (Op.getNode()->isStrictFPOpcode())
- // Relink the chain
- DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
- return Result;
+ if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
+ Results.push_back(Result);
+ if (Node->isStrictFPOpcode())
+ Results.push_back(Chain);
+ return;
}
// Otherwise go ahead and unroll.
- return DAG.UnrollVectorOp(Op.getNode());
+ if (Node->isStrictFPOpcode()) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
}
-SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
- EVT VT = Op.getOperand(0).getValueType();
- SDLoc DL(Op);
+void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ bool IsStrict = Node->isStrictFPOpcode();
+ unsigned OpNo = IsStrict ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
+ EVT VT = Src.getValueType();
+ SDLoc DL(Node);
// Attempt to expand using TargetLowering.
SDValue Result;
- if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
- return Result;
+ SDValue Chain;
+ if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
+ Results.push_back(Result);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
// Make sure that the SINT_TO_FP and SRL instructions are available.
- if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
- return DAG.UnrollVectorOp(Op.getNode());
+ if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+ TargetLowering::Expand) ||
+ (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+ TargetLowering::Expand)) ||
+ TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
+ if (IsStrict) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
+
+ Results.push_back(DAG.UnrollVectorOp(Node));
+ return;
+ }
unsigned BW = VT.getScalarSizeInBits();
assert((BW == 64 || BW == 32) &&
@@ -1227,153 +1360,141 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
// Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
+ SDValue TWOHW =
+ DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
// Clear upper part of LO, lower HI
- SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+ SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
+ SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+
+ if (IsStrict) {
+ // Convert hi and lo to floats
+ // Convert the hi part back to the upper values
+ // TODO: Can any fast-math-flags be set on these nodes?
+ SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+ {Node->getValueType(0), MVT::Other},
+ {Node->getOperand(0), HI});
+ fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
+ {fHI.getValue(1), fHI, TWOHW});
+ SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+ {Node->getValueType(0), MVT::Other},
+ {Node->getOperand(0), LO});
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
+ fLO.getValue(1));
+
+ // Add the two halves
+ SDValue Result =
+ DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
+ {TF, fHI, fLO});
+
+ Results.push_back(Result);
+ Results.push_back(Result.getValue(1));
+ return;
+ }
// Convert hi and lo to floats
// Convert the hi part back to the upper values
// TODO: Can any fast-math-flags be set on these nodes?
- SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
- fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
- SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+ SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
+ fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
+ SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
// Add the two halves
- return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+ Results.push_back(
+ DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
}
-SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
- if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
- SDLoc DL(Op);
- SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
+ if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+ SDLoc DL(Node);
+ SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
// TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
- return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
- Zero, Op.getOperand(0));
+ return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
+ Node->getOperand(0));
}
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(Node);
}
-SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+void VectorLegalizer::ExpandFSUB(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
// For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
// we can defer this to operation legalization where it will be lowered as
// a+(-b).
- EVT VT = Op.getValueType();
+ EVT VT = Node->getValueType(0);
if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
TLI.isOperationLegalOrCustom(ISD::FADD, VT))
- return Op; // Defer to LegalizeDAG
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
+ return; // Defer to LegalizeDAG
-SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
+ SDValue Tmp = DAG.UnrollVectorOp(Node);
+ Results.push_back(Tmp);
}
-SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
- SDValue Result;
- if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
- SDValue Result;
- if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandROT(SDValue Op) {
- SDValue Result;
- if (TLI.expandROT(Op.getNode(), Result, DAG))
- return Result;
-
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
- if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
- return Expanded;
- return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
- TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
-
- if (Op.getResNo() == 0) {
- AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
- return Result;
- } else {
- AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
- return Overflow;
- }
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
- TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
-
- if (Op.getResNo() == 0) {
- AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
- return Result;
- } else {
- AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
- return Overflow;
- }
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+void VectorLegalizer::ExpandMULO(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
SDValue Result, Overflow;
- if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
- std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+ if (!TLI.expandMULO(Node, Result, Overflow, DAG))
+ std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
- if (Op.getResNo() == 0) {
- AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
- return Result;
- } else {
- AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
- return Overflow;
- }
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
- if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
+ SDNode *N = Node;
+ if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
+ N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
return Expanded;
- return DAG.UnrollVectorOp(Op.getNode());
+ return DAG.UnrollVectorOp(N);
}
-SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
- if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
- return Expanded;
- return DAG.UnrollVectorOp(Op.getNode());
+void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
+ ExpandUINT_TO_FLOAT(Node, Results);
+ return;
+ }
+ if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
+ ExpandFP_TO_UINT(Node, Results);
+ return;
+ }
+
+ UnrollStrictFPOp(Node, Results);
}
-SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
- EVT VT = Op.getValueType();
+void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- unsigned NumOpers = Op.getNumOperands();
+ unsigned NumOpers = Node->getNumOperands();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT ValueVTs[] = {EltVT, MVT::Other};
- SDValue Chain = Op.getOperand(0);
- SDLoc dl(Op);
+
+ EVT TmpEltVT = EltVT;
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TmpEltVT);
+
+ EVT ValueVTs[] = {TmpEltVT, MVT::Other};
+ SDValue Chain = Node->getOperand(0);
+ SDLoc dl(Node);
SmallVector<SDValue, 32> OpValues;
SmallVector<SDValue, 32> OpChains;
@@ -1387,7 +1508,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
// Now process the remaining operands.
for (unsigned j = 1; j < NumOpers; ++j) {
- SDValue Oper = Op.getOperand(j);
+ SDValue Oper = Node->getOperand(j);
EVT OperVT = Oper.getValueType();
if (OperVT.isVector())
@@ -1397,28 +1518,37 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
Opers.push_back(Oper);
}
- SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+ SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
+ SDValue ScalarResult = ScalarOp.getValue(0);
+ SDValue ScalarChain = ScalarOp.getValue(1);
+
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS)
+ ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
+ DAG.getConstant(APInt::getAllOnesValue
+ (EltVT.getSizeInBits()), dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
- OpValues.push_back(ScalarOp.getValue(0));
- OpChains.push_back(ScalarOp.getValue(1));
+ OpValues.push_back(ScalarResult);
+ OpChains.push_back(ScalarChain);
}
SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
- AddLegalizedOperand(Op.getValue(0), Result);
- AddLegalizedOperand(Op.getValue(1), NewChain);
-
- return Op.getResNo() ? NewChain : Result;
+ Results.push_back(Result);
+ Results.push_back(NewChain);
}
-SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
- EVT VT = Op.getValueType();
+SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
unsigned NumElems = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
- SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue CC = Node->getOperand(2);
EVT TmpEltVT = LHS.getValueType().getVectorElementType();
- SDLoc dl(Op);
+ SDLoc dl(Node);
SmallVector<SDValue, 8> Ops(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
SDValue LHSElem = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 3763e886cef2..974914d00d05 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TypeSize.h"
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -50,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
@@ -146,35 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
R = ScalarizeVecRes_TernaryOp(N);
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- case ISD::STRICT_FP_EXTEND:
+
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
R = ScalarizeVecRes_StrictFPOp(N);
break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -187,7 +165,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
- R = ScalarizeVecRes_MULFIX(N);
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ R = ScalarizeVecRes_FIX(N);
break;
}
@@ -211,7 +191,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
Op0.getValueType(), Op0, Op1, Op2);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
@@ -226,10 +206,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
EVT ValueVTs[] = {VT, MVT::Other};
SDLoc dl(N);
- SmallVector<SDValue, 4> Opers;
+ SmallVector<SDValue, 4> Opers(NumOpers);
// The Chain is the first operand.
- Opers.push_back(Chain);
+ Opers[0] = Chain;
// Now process the remaining operands.
for (unsigned i = 1; i < NumOpers; ++i) {
@@ -238,7 +218,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
if (Oper.getValueType().isVector())
Oper = GetScalarizedVector(Oper);
- Opers.push_back(Oper);
+ Opers[i] = Oper;
}
SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
@@ -326,18 +306,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
NewVT, Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- SDValue Op = GetScalarizedVector(N->getOperand(1));
- SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
- { NewVT, MVT::Other },
- { N->getOperand(0), Op, N->getOperand(2) });
- // Legalize the chain result - switch anything that used the old chain to
- // use the new one.
- ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- return Res;
-}
-
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
@@ -606,6 +574,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
@@ -699,7 +669,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -804,7 +779,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+ // Do our own replacement and return SDValue() to tell the caller that we
+ // handled all replacements since caller can only handle a single result.
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return SDValue();
}
SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
@@ -901,13 +882,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
- case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
- case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
- case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
- case ISD::STRICT_FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
@@ -964,32 +941,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMA:
SplitVecRes_TernaryOp(N, Lo, Hi);
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
+
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+
case ISD::UADDO:
case ISD::SADDO:
case ISD::USUBO:
@@ -1002,7 +960,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
case ISD::UMULFIXSAT:
- SplitVecRes_MULFIX(N, Lo, Hi);
+ case ISD::SDIVFIX:
+ case ISD::UDIVFIX:
+ SplitVecRes_FIX(N, Lo, Hi);
break;
}
@@ -1041,7 +1001,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
Op0Hi, Op1Hi, Op2Hi);
}
-void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1206,9 +1166,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
- StackPtr =
- DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
+ StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -1304,12 +1262,12 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- SmallVector<SDValue, 4> OpsLo;
- SmallVector<SDValue, 4> OpsHi;
+ SmallVector<SDValue, 4> OpsLo(NumOps);
+ SmallVector<SDValue, 4> OpsHi(NumOps);
// The Chain is the first operand.
- OpsLo.push_back(Chain);
- OpsHi.push_back(Chain);
+ OpsLo[0] = Chain;
+ OpsHi[0] = Chain;
// Now process the remaining operands.
for (unsigned i = 1; i < NumOps; ++i) {
@@ -1327,8 +1285,8 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
}
- OpsLo.push_back(OpLo);
- OpsHi.push_back(OpHi);
+ OpsLo[i] = OpLo;
+ OpsHi[i] = OpHi;
}
EVT LoValueVTs[] = {LoVT, MVT::Other};
@@ -1572,12 +1530,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
SDValue &Lo, SDValue &Hi) {
+ assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
EVT LoVT, HiVT;
SDLoc dl(MLD);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();
+ SDValue Offset = MLD->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed masked load offset");
SDValue Mask = MLD->getMask();
SDValue PassThru = MLD->getPassThru();
unsigned Alignment = MLD->getOriginalAlignment();
@@ -1609,8 +1570,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());
- Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
- ExtType, MLD->isExpandingLoad());
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
+ MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
@@ -1621,8 +1583,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
MLD->getRanges());
- Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
- ExtType, MLD->isExpandingLoad());
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT,
+ MMO, MLD->getAddressingMode(), ExtType,
+ MLD->isExpandingLoad());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1747,24 +1710,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
- } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
- { N->getOperand(0), Lo, N->getOperand(2) });
- Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
- { N->getOperand(0), Hi, N->getOperand(2) });
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Lo.getValue(1), Hi.getValue(1));
- ReplaceValueWith(SDValue(N, 1), NewChain);
- } else if (N->isStrictFPOpcode()) {
- Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
- { N->getOperand(0), Lo });
- Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
- { N->getOperand(0), Hi });
- // Legalize the chain result - switch anything that used the old chain to
- // use the new one.
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- Lo.getValue(1), Hi.getValue(1));
- ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -2003,9 +1948,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VSELECT:
Res = SplitVecOp_VSELECT(N, OpNo);
break;
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
- if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
+ if (N->getValueType(0).bitsLT(
+ N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
Res = SplitVecOp_TruncateHelper(N);
else
Res = SplitVecOp_UnaryOp(N);
@@ -2357,8 +2305,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed masked store of vector?");
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
+ SDValue Offset = N->getOffset();
+ assert(Offset.isUndef() && "Unexpected indexed masked store offset");
SDValue Mask = N->getMask();
SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();
@@ -2392,8 +2343,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- N->isTruncatingStore(),
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
N->isCompressingStore());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
@@ -2405,8 +2356,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
N->getRanges());
- Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- N->isTruncatingStore(), N->isCompressingStore());
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
+ N->getAddressingMode(), N->isTruncatingStore(),
+ N->isCompressingStore());
// Build a factor node to remember that this store is independent of the
// other one.
@@ -2562,7 +2514,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
//
// Without this transform, the original truncate would end up being
// scalarized, which is pretty much always a last resort.
- SDValue InVec = N->getOperand(0);
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ SDValue InVec = N->getOperand(OpNo);
EVT InVT = InVec->getValueType(0);
EVT OutVT = N->getValueType(0);
unsigned NumElements = OutVT.getVectorNumElements();
@@ -2606,8 +2559,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
NumElements/2);
- SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
- SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+
+ SDValue HalfLo;
+ SDValue HalfHi;
+ SDValue Chain;
+ if (N->isStrictFPOpcode()) {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), HalfLo});
+ HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+ {N->getOperand(0), HalfHi});
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
+ HalfHi.getValue(1));
+ } else {
+ HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+ HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+ }
// Concatenate them to get the full intermediate truncation result.
EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -2616,6 +2584,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
// type. This should normally be something that ends up being legal directly,
// but in theory if a target has very wide vectors and an annoyingly
// restricted set of legal types, this split can chain to build things up.
+
+ if (N->isStrictFPOpcode()) {
+ SDValue Res = DAG.getNode(
+ ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
+ {Chain, InterVec,
+ DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
+ // Relink the chain
+ ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+ return Res;
+ }
+
return IsFloat
? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
DAG.getTargetConstant(
@@ -2774,30 +2753,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryWithExtraScalarOp(N);
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
Res = WidenVecRes_StrictFP(N);
break;
@@ -2843,13 +2801,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
- case ISD::STRICT_FP_EXTEND:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_TO_SINT:
- case ISD::STRICT_FP_TO_UINT:
- Res = WidenVecRes_Convert_StrictFP(N);
- break;
-
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -3091,6 +3042,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return WidenVecRes_STRICT_FSETCC(N);
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ return WidenVecRes_Convert_StrictFP(N);
+ default:
+ break;
+ }
+
// StrictFP op widening for operations that can trap.
unsigned NumOpers = N->getNumOperands();
unsigned Opcode = N->getOpcode();
@@ -3497,7 +3463,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
switch (getTypeAction(InVT)) {
case TargetLowering::TypeLegal:
break;
- case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypePromoteInteger: {
// If the incoming type is a vector that is being promoted, then
// we know that the elements are arranged differently and that we
// must perform the conversion using a stack slot.
@@ -3506,11 +3472,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
// If the InOp is promoted to the same size, convert it. Otherwise,
// fall out of the switch and widen the promoted input.
- InOp = GetPromotedInteger(InOp);
- InVT = InOp.getValueType();
- if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ SDValue NInOp = GetPromotedInteger(InOp);
+ EVT NInVT = NInOp.getValueType();
+ if (WidenVT.bitsEq(NInVT)) {
+ // For big endian targets we need to shift the input integer or the
+ // interesting bits will end up at the wrong place.
+ if (DAG.getDataLayout().isBigEndian()) {
+ unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout());
+ assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!");
+ NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp,
+ DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp);
+ }
+ InOp = NInOp;
+ InVT = NInVT;
break;
+ }
case TargetLowering::TypeSoftenFloat:
case TargetLowering::TypePromoteFloat:
case TargetLowering::TypeExpandInteger:
@@ -3748,10 +3727,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
WidenVT.getVectorNumElements());
Mask = ModifyToType(Mask, WideMaskVT, true);
- SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
- Mask, PassThru, N->getMemoryVT(),
- N->getMemOperand(), ExtType,
- N->isExpandingLoad());
+ SDValue Res = DAG.getMaskedLoad(
+ WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+ PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+ ExtType, N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3798,6 +3777,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+// Return true is this is a SETCC node or a strict version of it.
+static inline bool isSETCCOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return true;
+ }
+ return false;
+}
+
// Return true if this is a node that could have two SETCCs as operands.
static inline bool isLogicalMaskOp(unsigned Opcode) {
switch (Opcode) {
@@ -3809,6 +3799,13 @@ static inline bool isLogicalMaskOp(unsigned Opcode) {
return false;
}
+// If N is a SETCC or a strict variant of it, return the type
+// of the compare operands.
+static inline EVT getSETCCOperandType(SDValue N) {
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ return N->getOperand(OpNo).getValueType();
+}
+
// This is used just for the assert in convertMask(). Check that this either
// a SETCC or a previously handled SETCC by convertMask().
#ifndef NDEBUG
@@ -3831,7 +3828,7 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) {
return isSETCCorConvertedSETCC(N.getOperand(0)) &&
isSETCCorConvertedSETCC(N.getOperand(1));
- return (N.getOpcode() == ISD::SETCC ||
+ return (isSETCCOp(N.getOpcode()) ||
ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
}
#endif
@@ -3846,10 +3843,17 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
// Make a new Mask node, with a legal result VT.
+ SDValue Mask;
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
Ops.push_back(InMask->getOperand(i));
- SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+ if (InMask->isStrictFPOpcode()) {
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
+ { MaskVT, MVT::Other }, Ops);
+ ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
+ }
+ else
+ Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
// If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
// extend or truncate is needed.
@@ -3902,7 +3906,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
if (N->getOpcode() != ISD::VSELECT)
return SDValue();
- if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
return SDValue();
// If this is a splitted VSELECT that was previously already handled, do
@@ -3925,8 +3929,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
return SDValue();
// If there is support for an i1 vector mask, don't touch.
- if (Cond.getOpcode() == ISD::SETCC) {
- EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ if (isSETCCOp(Cond.getOpcode())) {
+ EVT SetCCOpVT = getSETCCOperandType(Cond);
while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
@@ -3957,17 +3961,17 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
SDValue Mask;
- if (Cond->getOpcode() == ISD::SETCC) {
- EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
+ if (isSETCCOp(Cond->getOpcode())) {
+ EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
Mask = convertMask(Cond, MaskVT, ToMaskVT);
} else if (isLogicalMaskOp(Cond->getOpcode()) &&
- Cond->getOperand(0).getOpcode() == ISD::SETCC &&
- Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ isSETCCOp(Cond->getOperand(0).getOpcode()) &&
+ isSETCCOp(Cond->getOperand(1).getOpcode())) {
// Cond is (AND/OR/XOR (SETCC, SETCC))
SDValue SETCC0 = Cond->getOperand(0);
SDValue SETCC1 = Cond->getOperand(1);
- EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
- EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
+ EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
+ EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
unsigned ScalarBits0 = VT0.getScalarSizeInBits();
unsigned ScalarBits1 = VT1.getScalarSizeInBits();
unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
@@ -4119,6 +4123,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
WidenVT, InOp1, InOp2, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(1).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ SDValue CC = N->getOperand(3);
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+
+ // Fully unroll and reassemble.
+ SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 8> Chains(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, dl, Scalars);
+}
//===----------------------------------------------------------------------===//
// Widen Vector Operand
@@ -4150,6 +4195,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
@@ -4161,12 +4208,16 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
+ case ISD::STRICT_SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;
@@ -4297,13 +4348,21 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
SDValue Res;
if (N->isStrictFPOpcode()) {
- Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
- { N->getOperand(0), InOp });
+ if (Opcode == ISD::STRICT_FP_ROUND)
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp, N->getOperand(2) });
+ else
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp });
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
- } else
- Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ } else {
+ if (Opcode == ISD::FP_ROUND)
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
+ else
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ }
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -4486,7 +4545,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
StVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
- Mask, MST->getMemoryVT(), MST->getMemOperand(),
+ MST->getOffset(), Mask, MST->getMemoryVT(),
+ MST->getMemOperand(), MST->getAddressingMode(),
false, MST->isCompressingStore());
}
@@ -4580,6 +4640,44 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
return DAG.getNode(ExtendCode, dl, VT, CC);
}
+SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue LHS = GetWidenedVector(N->getOperand(1));
+ SDValue RHS = GetWidenedVector(N->getOperand(2));
+ SDValue CC = N->getOperand(3);
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Unroll into a build vector.
+ SmallVector<SDValue, 8> Scalars(NumElts);
+ SmallVector<SDValue, 8> Chains(NumElts);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue LHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ SDValue RHSElem = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+ {Chain, LHSElem, RHSElem, CC});
+ Chains[i] = Scalars[i].getValue(1);
+ Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+ DAG.getBoolConstant(true, dl, EltVT, VT),
+ DAG.getBoolConstant(false, dl, EltVT, VT));
+ }
+
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(VT, dl, Scalars);
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
SDLoc dl(N);
SDValue Op = GetWidenedVector(N->getOperand(0));
@@ -4670,7 +4768,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned Width, EVT WidenVT,
unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
- unsigned WidenWidth = WidenVT.getSizeInBits();
+ const bool Scalable = WidenVT.isScalableVector();
+ unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
unsigned AlignInBits = Align*8;
@@ -4681,23 +4780,27 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
// See if there is larger legal integer than the element type to load/store.
unsigned VT;
- for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
- VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
- EVT MemVT((MVT::SimpleValueType) VT);
- unsigned MemVTWidth = MemVT.getSizeInBits();
- if (MemVT.getSizeInBits() <= WidenEltWidth)
- break;
- auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
- if ((Action == TargetLowering::TypeLegal ||
- Action == TargetLowering::TypePromoteInteger) &&
- (WidenWidth % MemVTWidth) == 0 &&
- isPowerOf2_32(WidenWidth / MemVTWidth) &&
- (MemVTWidth <= Width ||
- (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
- if (MemVTWidth == WidenWidth)
- return MemVT;
- RetVT = MemVT;
- break;
+ // Don't bother looking for an integer type if the vector is scalable, skip
+ // to vector types.
+ if (!Scalable) {
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (MemVTWidth == WidenWidth)
+ return MemVT;
+ RetVT = MemVT;
+ break;
+ }
}
}
@@ -4706,7 +4809,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
- unsigned MemVTWidth = MemVT.getSizeInBits();
+ // Skip vector MVTs which don't match the scalable property of WidenVT.
+ if (Scalable != MemVT.isScalableVector())
+ continue;
+ unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize();
auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
if ((Action == TargetLowering::TypeLegal ||
Action == TargetLowering::TypePromoteInteger) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d4c1fb36475e..0e4d783e3505 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -910,10 +910,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
- if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (MDNode *MD = DAG->getHeapAllocSite(N))
if (NewInsn && NewInsn->isCall())
- MF.addCodeViewHeapAllocSite(NewInsn, MD);
- }
+ NewInsn->setHeapAllocMarker(MF, MD);
GluedNodes.pop_back();
}
@@ -923,9 +922,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
+
if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
if (NewInsn && NewInsn->isCall())
- MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ NewInsn->setHeapAllocMarker(MF, MD);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 52a71b91d93f..313e07b5fdd6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,6 +24,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -63,6 +66,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -352,9 +356,9 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
(OldG << 2)); // New L bit.
}
-ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) {
unsigned Operation = Op;
- if (isInteger)
+ if (isIntegerLike)
Operation ^= 7; // Flip L, G, E bits, but not U.
else
Operation ^= 15; // Flip all of the condition bits.
@@ -365,6 +369,15 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
return ISD::CondCode(Operation);
}
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) {
+ return getSetCCInverseImpl(Op, Type.isInteger());
+}
+
+ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op,
+ bool isIntegerLike) {
+ return getSetCCInverseImpl(Op, isIntegerLike);
+}
+
/// For an integer comparison, return 1 if the comparison is a signed operation
/// and 2 if the result is an unsigned comparison. Return zero if the operation
/// does not depend on the sign of the input (setne and seteq).
@@ -385,7 +398,8 @@ static int isSignedOp(ISD::CondCode Opcode) {
}
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool IsInteger) {
+ EVT Type) {
+ bool IsInteger = Type.isInteger();
if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed integer setcc with an unsigned integer setcc.
return ISD::SETCC_INVALID;
@@ -405,7 +419,8 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
}
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool IsInteger) {
+ EVT Type) {
+ bool IsInteger = Type.isInteger();
if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed setcc with an unsigned setcc.
return ISD::SETCC_INVALID;
@@ -1005,7 +1020,9 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis * Divergence) {
+ LegacyDivergenceAnalysis * Divergence,
+ ProfileSummaryInfo *PSIin,
+ BlockFrequencyInfo *BFIin) {
MF = &NewMF;
SDAGISelPass = PassPtr;
ORE = &NewORE;
@@ -1014,6 +1031,8 @@ void SelectionDAG::init(MachineFunction &NewMF,
LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
DA = Divergence;
+ PSI = PSIin;
+ BFI = BFIin;
}
SelectionDAG::~SelectionDAG() {
@@ -1023,6 +1042,11 @@ SelectionDAG::~SelectionDAG() {
delete DbgInfo;
}
+bool SelectionDAG::shouldOptForSize() const {
+ return MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI);
+}
+
void SelectionDAG::allnodes_clear() {
assert(&*AllNodes.begin() == &EntryNode);
AllNodes.remove(AllNodes.begin());
@@ -1101,6 +1125,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
: getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
}
+std::pair<SDValue, SDValue>
+SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
+ const SDLoc &DL, EVT VT) {
+ assert(!VT.bitsEq(Op.getValueType()) &&
+ "Strict no-op FP extend/round not allowed.");
+ SDValue Res =
+ VT.bitsGT(Op.getValueType())
+ ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
+ : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+ {Chain, Op, getIntPtrConstant(0, DL)});
+
+ return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
+}
+
SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
return VT.bitsGT(Op.getValueType()) ?
getNode(ISD::ANY_EXTEND, DL, VT, Op) :
@@ -1279,7 +1317,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
}
SDValue Result(N, 0);
- if (VT.isVector())
+ if (VT.isScalableVector())
+ Result = getSplatVector(VT, DL, Result);
+ else if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
return Result;
@@ -1425,7 +1465,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = MF->getFunction().hasOptSize()
+ Alignment = shouldOptForSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -2379,9 +2419,10 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
-static const APInt *getValidShiftAmountConstant(SDValue V) {
+static const APInt *getValidShiftAmountConstant(SDValue V,
+ const APInt &DemandedElts) {
unsigned BitWidth = V.getScalarValueSizeInBits();
- if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
// Shifting more than the bitwidth is not valid.
const APInt &ShAmt = SA->getAPIntValue();
if (ShAmt.ult(BitWidth))
@@ -2392,13 +2433,16 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
/// than the element bit-width of the shift node, return the minimum value.
-static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+static const APInt *
+getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
unsigned BitWidth = V.getScalarValueSizeInBits();
auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
if (!BV)
return nullptr;
const APInt *MinShAmt = nullptr;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
if (!SA)
return nullptr;
@@ -2413,6 +2457,32 @@ static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
return MinShAmt;
}
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the maximum value.
+static const APInt *
+getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MaxShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MaxShAmt && MaxShAmt->uge(ShAmt))
+ continue;
+ MaxShAmt = &ShAmt;
+ }
+ return MaxShAmt;
+}
+
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
@@ -2784,37 +2854,60 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setBitsFrom(1);
break;
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
Known.Zero.setBitsFrom(1);
break;
+ }
case ISD::SHL:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned Shift = ShAmt->getZExtValue();
Known.Zero <<= Shift;
Known.One <<= Shift;
// Low bits are known zero.
Known.Zero.setLowBits(Shift);
+ break;
}
+
+ // No matter the shift amount, the trailing zeros will stay zero.
+ Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
+ Known.One.clearAllBits();
+
+ // Minimum shift low bits are known zero.
+ if (const APInt *ShMinAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Known.Zero.setLowBits(ShMinAmt->getZExtValue());
break;
case ISD::SRL:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
- Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
unsigned Shift = ShAmt->getZExtValue();
Known.Zero.lshrInPlace(Shift);
Known.One.lshrInPlace(Shift);
// High bits are known zero.
Known.Zero.setHighBits(Shift);
- } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
- // Minimum shift high bits are known zero.
- Known.Zero.setHighBits(ShMinAmt->getZExtValue());
+ break;
}
+
+ // No matter the shift amount, the leading zeros will stay zero.
+ Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
+ Known.One.clearAllBits();
+
+ // Minimum shift high bits are known zero.
+ if (const APInt *ShMinAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
break;
case ISD::SRA:
- if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
unsigned Shift = ShAmt->getZExtValue();
// Sign extend known zero/one bit (else is unknown).
@@ -3336,20 +3429,20 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
KnownBits N0Known = computeKnownBits(N0);
bool overflow;
- (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
+ (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow);
if (!overflow)
return OFK_Never;
}
// mulhi + 1 never overflow
if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
- (~N1Known.Zero & 0x01) == ~N1Known.Zero)
+ (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue())
return OFK_Never;
if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
KnownBits N0Known = computeKnownBits(N0);
- if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
+ if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue())
return OFK_Never;
}
@@ -3550,25 +3643,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = VTBits - SrcVT.getScalarSizeInBits();
return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
}
-
case ISD::SRA:
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
- // SRA X, C -> adds C sign bits.
- if (ConstantSDNode *C =
- isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
- APInt ShiftVal = C->getAPIntValue();
- ShiftVal += Tmp;
- Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
- }
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // SRA X, C -> adds C sign bits.
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts))
+ Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
+ else if (const APInt *ShAmt =
+ getValidMinimumShiftAmountConstant(Op, DemandedElts))
+ Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
return Tmp;
case ISD::SHL:
- if (ConstantSDNode *C =
- isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
- // shl destroys sign bits.
- Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
- if (C->getAPIntValue().uge(VTBits) || // Bad shift.
- C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
- return Tmp - C->getZExtValue();
+ if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
+ // shl destroys sign bits, ensure it doesn't shift out all sign bits.
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (ShAmt->ult(Tmp))
+ return Tmp - ShAmt->getZExtValue();
+ } else if (const APInt *ShAmt =
+ getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (ShAmt->ult(Tmp))
+ return Tmp - ShAmt->getZExtValue();
}
break;
case ISD::AND:
@@ -3648,11 +3742,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return VTBits;
break;
case ISD::SETCC:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
+ }
case ISD::ROTL:
case ISD::ROTR:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -4648,11 +4746,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
- OpOpcode == ISD::FSUB)
- return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
- Operand.getOperand(0), Flags);
if (OpOpcode == ISD::FNEG) // --X -> X
return Operand.getOperand(0);
break;
@@ -4689,46 +4782,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
}
-static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
- const APInt &C2) {
+static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+ const APInt &C2) {
switch (Opcode) {
- case ISD::ADD: return std::make_pair(C1 + C2, true);
- case ISD::SUB: return std::make_pair(C1 - C2, true);
- case ISD::MUL: return std::make_pair(C1 * C2, true);
- case ISD::AND: return std::make_pair(C1 & C2, true);
- case ISD::OR: return std::make_pair(C1 | C2, true);
- case ISD::XOR: return std::make_pair(C1 ^ C2, true);
- case ISD::SHL: return std::make_pair(C1 << C2, true);
- case ISD::SRL: return std::make_pair(C1.lshr(C2), true);
- case ISD::SRA: return std::make_pair(C1.ashr(C2), true);
- case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
- case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
- case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
- case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
- case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
- case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
- case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
- case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
- case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
- case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
+ case ISD::ADD: return C1 + C2;
+ case ISD::SUB: return C1 - C2;
+ case ISD::MUL: return C1 * C2;
+ case ISD::AND: return C1 & C2;
+ case ISD::OR: return C1 | C2;
+ case ISD::XOR: return C1 ^ C2;
+ case ISD::SHL: return C1 << C2;
+ case ISD::SRL: return C1.lshr(C2);
+ case ISD::SRA: return C1.ashr(C2);
+ case ISD::ROTL: return C1.rotl(C2);
+ case ISD::ROTR: return C1.rotr(C2);
+ case ISD::SMIN: return C1.sle(C2) ? C1 : C2;
+ case ISD::SMAX: return C1.sge(C2) ? C1 : C2;
+ case ISD::UMIN: return C1.ule(C2) ? C1 : C2;
+ case ISD::UMAX: return C1.uge(C2) ? C1 : C2;
+ case ISD::SADDSAT: return C1.sadd_sat(C2);
+ case ISD::UADDSAT: return C1.uadd_sat(C2);
+ case ISD::SSUBSAT: return C1.ssub_sat(C2);
+ case ISD::USUBSAT: return C1.usub_sat(C2);
case ISD::UDIV:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.udiv(C2), true);
+ return C1.udiv(C2);
case ISD::UREM:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.urem(C2), true);
+ return C1.urem(C2);
case ISD::SDIV:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.sdiv(C2), true);
+ return C1.sdiv(C2);
case ISD::SREM:
if (!C2.getBoolValue())
break;
- return std::make_pair(C1.srem(C2), true);
+ return C1.srem(C2);
}
- return std::make_pair(APInt(1, 0), false);
+ return llvm::None;
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
@@ -4736,12 +4829,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
const ConstantSDNode *C2) {
if (C1->isOpaque() || C2->isOpaque())
return SDValue();
-
- std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
- C2->getAPIntValue());
- if (!Folded.second)
- return SDValue();
- return getConstant(Folded.first, DL, VT);
+ if (Optional<APInt> Folded =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()))
+ return getConstant(Folded.getValue(), DL, VT);
+ return SDValue();
}
SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
@@ -5228,8 +5319,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
"The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
element type of the vector.");
- // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
- if (N1.isUndef())
+ // Extract from an undefined value or using an undefined index is undefined.
+ if (N1.isUndef() || N2.isUndef())
return getUNDEF(VT);
// EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
@@ -5506,6 +5597,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
return getUNDEF(VT);
+
+ // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
+ if (N3.isUndef())
+ return getUNDEF(VT);
+
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (N2.isUndef())
+ return N1;
+
break;
}
case ISD::INSERT_SUBVECTOR: {
@@ -5697,10 +5797,19 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
return SDValue(nullptr, 0);
}
-SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
- const SDLoc &DL) {
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset,
+ const SDLoc &DL,
+ const SDNodeFlags Flags) {
EVT VT = Base.getValueType();
- return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
+ return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags);
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
+ const SDLoc &DL,
+ const SDNodeFlags Flags) {
+ assert(Offset.getValueType().isInteger());
+ EVT BasePtrVT = Ptr.getValueType();
+ return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags);
}
/// Returns true if memcpy source is constant data.
@@ -5722,12 +5831,13 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
SrcDelta + G->getOffset());
}
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF,
+ SelectionDAG &DAG) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
+ return DAG.shouldOptForSize();
}
static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
@@ -5777,7 +5887,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -5960,7 +6070,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -6066,7 +6176,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
bool DstAlignCanChange = false;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
+ bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -6557,7 +6667,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
- if (!Size)
+ if (!Size && MemVT.isScalableVector())
+ Size = MemoryLocation::UnknownSize;
+ else if (!Size)
Size = MemVT.getStoreSize();
MachineFunction &MF = getMachineFunction();
@@ -6951,16 +7063,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
}
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
- SDValue Ptr, SDValue Mask, SDValue PassThru,
- EVT MemVT, MachineMemOperand *MMO,
+ SDValue Base, SDValue Offset, SDValue Mask,
+ SDValue PassThru, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM,
ISD::LoadExtType ExtTy, bool isExpanding) {
- SDVTList VTs = getVTList(VT, MVT::Other);
- SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) &&
+ "Unindexed masked load with an offset!");
+ SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
- dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
+ dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -6968,7 +7086,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- ExtTy, isExpanding, MemVT, MMO);
+ AM, ExtTy, isExpanding, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -6978,27 +7096,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
return V;
}
+SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!");
+ return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base,
+ Offset, LD->getMask(), LD->getPassThru(),
+ LD->getMemoryVT(), LD->getMemOperand(), AM,
+ LD->getExtensionType(), LD->isExpandingLoad());
+}
+
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
- SDValue Val, SDValue Ptr, SDValue Mask,
- EVT MemVT, MachineMemOperand *MMO,
- bool IsTruncating, bool IsCompressing) {
+ SDValue Val, SDValue Base, SDValue Offset,
+ SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO,
+ ISD::MemIndexedMode AM, bool IsTruncating,
+ bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- SDVTList VTs = getVTList(MVT::Other);
- SDValue Ops[] = { Chain, Val, Ptr, Mask };
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) &&
+ "Unindexed masked store with an offset!");
+ SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other)
+ : getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Val, Base, Offset, Mask};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
- dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
+ dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
}
- auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- IsTruncating, IsCompressing, MemVT, MMO);
+ auto *N =
+ newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ IsTruncating, IsCompressing, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -7008,6 +7144,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() &&
+ "Masked store is already a indexed store!");
+ return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset,
+ ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(),
+ AM, ST->isTruncatingStore(), ST->isCompressingStore());
+}
+
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
@@ -7263,8 +7410,40 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
if (VTList.NumVTs == 1)
return getNode(Opcode, DL, VTList.VTs[0], Ops);
-#if 0
switch (Opcode) {
+ case ISD::STRICT_FP_EXTEND:
+ assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
+ "Invalid STRICT_FP_EXTEND!");
+ assert(VTList.VTs[0].isFloatingPoint() &&
+ Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!");
+ assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+ "STRICT_FP_EXTEND result type should be vector iff the operand "
+ "type is vector!");
+ assert((!VTList.VTs[0].isVector() ||
+ VTList.VTs[0].getVectorNumElements() ==
+ Ops[1].getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) &&
+ "Invalid fpext node, dst <= src!");
+ break;
+ case ISD::STRICT_FP_ROUND:
+ assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!");
+ assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+ "STRICT_FP_ROUND result type should be vector iff the operand "
+ "type is vector!");
+ assert((!VTList.VTs[0].isVector() ||
+ VTList.VTs[0].getVectorNumElements() ==
+ Ops[1].getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
+ assert(VTList.VTs[0].isFloatingPoint() &&
+ Ops[1].getValueType().isFloatingPoint() &&
+ VTList.VTs[0].bitsLT(Ops[1].getValueType()) &&
+ isa<ConstantSDNode>(Ops[2]) &&
+ (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 ||
+ cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) &&
+ "Invalid STRICT_FP_ROUND!");
+ break;
+#if 0
// FIXME: figure out how to safely handle things like
// int foo(int x) { return 1 << (x & 255); }
// int bar() { return foo(256); }
@@ -7283,8 +7462,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
}
break;
- }
#endif
+ }
// Memoize the node unless it returns a flag.
SDNode *N;
@@ -7740,38 +7919,11 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
- case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
- case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
- case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
- case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
- case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
- case ISD::STRICT_FMA: NewOpc = ISD::FMA; break;
- case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break;
- case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
- case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break;
- case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break;
- case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break;
- case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break;
- case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
- case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
- case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
- case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;
- case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;
- case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
- case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
- case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
- case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
- case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
- case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
- case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;
- case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;
- case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
- case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
- case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
- case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
- case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
- case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
+#include "llvm/IR/ConstrainedOps.def"
}
assert(Node->getNumValues() == 2 && "Unexpected number of results!");
@@ -8051,9 +8203,9 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
Expr = *Fragment;
}
// Clone the SDDbgValue and move it to To.
- SDDbgValue *Clone =
- getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
- Dbg->getDebugLoc(), Dbg->getOrder());
+ SDDbgValue *Clone = getDbgValue(
+ Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(),
+ std::max(ToNode->getIROrder(), Dbg->getOrder()));
ClonedDVs.push_back(Clone);
if (InvalidateDbg) {
@@ -8831,7 +8983,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
// We check here that the size of the memory operand fits within the size of
// the MMO. This is because the MMO might indicate only a possible address
// range instead of specifying the affected memory addresses precisely.
- assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
+ // TODO: Make MachineMemOperands aware of scalable vectors.
+ assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() &&
+ "Size mismatch!");
}
/// Profile - Gather unique data for the node.
@@ -9245,11 +9399,11 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
/// it cannot be inferred.
unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
- const GlobalValue *GV;
+ const GlobalValue *GV = nullptr;
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
- unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
- KnownBits Known(IdxWidth);
+ unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+ KnownBits Known(PtrWidth);
llvm::computeKnownBits(GV, Known, getDataLayout());
unsigned AlignBits = Known.countMinTrailingZeros();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8c15563fcd23..728d963a916f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -27,11 +27,13 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -84,6 +86,8 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -722,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
unsigned IntermediateNumElts = IntermediateVT.isVector() ?
IntermediateVT.getVectorNumElements() : 1;
- // Convert the vector to the appropiate type if necessary.
+ // Convert the vector to the appropriate type if necessary.
unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
EVT BuiltVectorTy = EVT::getVectorVT(
@@ -1021,6 +1025,8 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
@@ -1031,50 +1037,66 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-SDValue SelectionDAGBuilder::getRoot() {
- if (PendingLoads.empty())
- return DAG.getRoot();
-
- if (PendingLoads.size() == 1) {
- SDValue Root = PendingLoads[0];
- DAG.setRoot(Root);
- PendingLoads.clear();
- return Root;
- }
-
- // Otherwise, we have to make a token factor node.
- SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
- PendingLoads.clear();
- DAG.setRoot(Root);
- return Root;
-}
-
-SDValue SelectionDAGBuilder::getControlRoot() {
+// Update DAG root to include dependencies on Pending chains.
+SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
SDValue Root = DAG.getRoot();
- if (PendingExports.empty())
+ if (Pending.empty())
return Root;
- // Turn all of the CopyToReg chains into one factored node.
+ // Add current root to PendingChains, unless we already indirectly
+ // depend on it.
if (Root.getOpcode() != ISD::EntryToken) {
- unsigned i = 0, e = PendingExports.size();
+ unsigned i = 0, e = Pending.size();
for (; i != e; ++i) {
- assert(PendingExports[i].getNode()->getNumOperands() > 1);
- if (PendingExports[i].getNode()->getOperand(0) == Root)
+ assert(Pending[i].getNode()->getNumOperands() > 1);
+ if (Pending[i].getNode()->getOperand(0) == Root)
break; // Don't add the root if we already indirectly depend on it.
}
if (i == e)
- PendingExports.push_back(Root);
+ Pending.push_back(Root);
}
- Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- PendingExports);
- PendingExports.clear();
+ if (Pending.size() == 1)
+ Root = Pending[0];
+ else
+ Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
+
DAG.setRoot(Root);
+ Pending.clear();
return Root;
}
+SDValue SelectionDAGBuilder::getMemoryRoot() {
+ return updateRoot(PendingLoads);
+}
+
+SDValue SelectionDAGBuilder::getRoot() {
+ // Chain up all pending constrained intrinsics together with all
+ // pending loads, by simply appending them to PendingLoads and
+ // then calling getMemoryRoot().
+ PendingLoads.reserve(PendingLoads.size() +
+ PendingConstrainedFP.size() +
+ PendingConstrainedFPStrict.size());
+ PendingLoads.append(PendingConstrainedFP.begin(),
+ PendingConstrainedFP.end());
+ PendingLoads.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFP.clear();
+ PendingConstrainedFPStrict.clear();
+ return getMemoryRoot();
+}
+
+SDValue SelectionDAGBuilder::getControlRoot() {
+ // We need to emit pending fpexcept.strict constrained intrinsics,
+ // so append them to the PendingExports list.
+ PendingExports.append(PendingConstrainedFPStrict.begin(),
+ PendingConstrainedFPStrict.end());
+ PendingConstrainedFPStrict.clear();
+ return updateRoot(PendingExports);
+}
+
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
if (I.isTerminator()) {
@@ -1104,6 +1126,15 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
Node->intersectFlagsWith(IncomingFlags);
}
}
+ // Constrained FP intrinsics with fpexcept.ignore should also get
+ // the NoFPExcept flag.
+ if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I))
+ if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore)
+ if (SDNode *Node = getNodeForIRValue(&I)) {
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoFPExcept(true);
+ Node->setFlags(Flags);
+ }
if (!I.isTerminator() && !HasTailCall &&
!isStatepoint(&I)) // statepoints handle their exports internally
@@ -2746,8 +2777,9 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
- assert(!I.hasOperandBundlesOtherThan(
- {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+ LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget}) &&
"Cannot lower invokes with arbitrary operand bundles yet!");
const Value *Callee(I.getCalledValue());
@@ -3033,7 +3065,7 @@ static bool isVectorReductionOp(const User *I) {
if (!Visited.insert(User).second)
continue;
- for (const auto &U : User->users()) {
+ for (const auto *U : User->users()) {
auto Inst = dyn_cast<Instruction>(U);
if (!Inst)
return false;
@@ -3119,6 +3151,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
if (isVectorReductionOp(&I)) {
Flags.setVectorReduction(true);
LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+
+ // If no flags are set we will propagate the incoming flags, if any flags
+ // are set, we will intersect them with the incoming flag and so we need to
+ // copy the FMF flags here.
+ if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
+ Flags.copyFMF(*FPOp);
+ }
}
SDValue Op1 = getValue(I.getOperand(0));
@@ -4039,9 +4078,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (isVolatile || NumValues > MaxParallelChains)
+ if (isVolatile)
// Serialize volatile loads with other side effects.
Root = getRoot();
+ else if (NumValues > MaxParallelChains)
+ Root = getMemoryRoot();
else if (AA &&
AA->pointsToConstantMemory(MemoryLocation(
SV,
@@ -4216,10 +4257,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Src = getValue(SrcV);
SDValue Ptr = getValue(PtrV);
- SDValue Root = getRoot();
+ SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
- EVT PtrVT = Ptr.getValueType();
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
@@ -4245,8 +4285,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
Root = Chain;
ChainI = 0;
}
- SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
- DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
+ SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags);
SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
@@ -4292,6 +4331,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
@@ -4303,11 +4343,14 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOStore, VT.getStoreSize(),
+ MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo);
- SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
- MMO, false /* Truncating */,
- IsCompressing);
+ SDValue StoreNode =
+ DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
+ ISD::UNINDEXED, false /* Truncating */, IsCompressing);
DAG.setRoot(StoreNode);
setValue(&I, StoreNode);
}
@@ -4346,9 +4389,10 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
unsigned FinalIndex = GEP->getNumOperands() - 1;
Value *IndexVal = GEP->getOperand(FinalIndex);
+ gep_type_iterator GTI = gep_type_begin(*GEP);
// Ensure all the other indices are 0.
- for (unsigned i = 1; i < FinalIndex; ++i) {
+ for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
auto *C = dyn_cast<Constant>(GEP->getOperand(i));
if (!C)
return false;
@@ -4361,18 +4405,39 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
// The operands of the GEP may be defined in another basic block.
// In this case we'll not find nodes for the operands.
- if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
+ if (!SDB->findValue(Ptr))
+ return false;
+ Constant *C = dyn_cast<Constant>(IndexVal);
+ if (!C && !SDB->findValue(IndexVal))
return false;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
- SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ StructType *STy = GTI.getStructTypeOrNull();
+
+ if (STy) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ if (isa<VectorType>(C->getType())) {
+ C = C->getSplatValue();
+ // FIXME: If getSplatValue may return nullptr for a structure?
+ // If not, the following check can be removed.
+ if (!C)
+ return false;
+ }
+ auto *CI = cast<ConstantInt>(C);
+ Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ } else {
+ Scale = DAG.getTargetConstant(
+ DL.getTypeAllocSize(GEP->getResultElementType()),
+ SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+ Index = SDB->getValue(IndexVal);
+ }
Base = SDB->getValue(Ptr);
- Index = SDB->getValue(IndexVal);
IndexType = ISD::SIGNED_SCALED;
- if (!Index.getValueType().isVector()) {
+ if (STy || !Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
@@ -4383,7 +4448,7 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDLoc sdl = getCurSDLoc();
- // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+ // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
const Value *Ptr = I.getArgOperand(1);
SDValue Src0 = getValue(I.getArgOperand(0));
SDValue Mask = getValue(I.getArgOperand(3));
@@ -4407,7 +4472,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
- MachineMemOperand::MOStore, VT.getStoreSize(),
+ MachineMemOperand::MOStore,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4415,7 +4483,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
- SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
+ SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
Ops, MMO, IndexType);
DAG.setRoot(Scatter);
@@ -4452,6 +4520,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
SDValue Ptr = getValue(PtrOperand);
SDValue Src0 = getValue(Src0Operand);
SDValue Mask = getValue(MaskOperand);
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
EVT VT = Src0.getValueType();
if (!Alignment)
@@ -4462,22 +4531,29 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- bool AddToChain =
- !AA || !AA->pointsToConstantMemory(MemoryLocation(
- PtrOperand,
- LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo));
+ MemoryLocation ML;
+ if (VT.isScalableVector())
+ ML = MemoryLocation(PtrOperand);
+ else
+ ML = MemoryLocation(PtrOperand, LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(I.getType())),
+ AAInfo);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(PtrOperand),
- MachineMemOperand::MOLoad, VT.getStoreSize(),
+ MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo, Ranges);
- SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
- ISD::NON_EXTLOAD, IsExpanding);
+ SDValue Load =
+ DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
+ ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
if (AddToChain)
PendingLoads.push_back(Load.getValue(1));
setValue(&I, Load);
@@ -4524,7 +4600,10 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
- MachineMemOperand::MOLoad, VT.getStoreSize(),
+ MachineMemOperand::MOLoad,
+ // TODO: Make MachineMemOperands aware of scalable
+ // vectors.
+ VT.getStoreSize().getKnownMinSize(),
Alignment, AAInfo, Ranges);
if (!UniformBase) {
@@ -4634,10 +4713,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Ops[3];
Ops[0] = getRoot();
- Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
- TLI.getFenceOperandTy(DAG.getDataLayout()));
- Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
- TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
+ Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -5344,8 +5423,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
- const Function &F = DAG.getMachineFunction().getFunction();
- if (!F.hasOptSize() ||
+ bool OptForSize = DAG.shouldOptForSize();
+ if (!OptForSize ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
@@ -5382,6 +5461,60 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
+static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
+ SDValue LHS, SDValue RHS, SDValue Scale,
+ SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX;
+ LLVMContext &Ctx = *DAG.getContext();
+
+ // If the type is legal but the operation isn't, this node might survive all
+ // the way to operation legalization. If we end up there and we do not have
+ // the ability to widen the type (if VT*2 is not legal), we cannot expand the
+ // node.
+
+ // Coax the legalizer into expanding the node during type legalization instead
+ // by bumping the size by one bit. This will force it to Promote, enabling the
+ // early expansion and avoiding the need to expand later.
+
+ // We don't have to do this if Scale is 0; that can always be expanded.
+
+ // FIXME: We wouldn't have to do this (or any of the early
+ // expansion/promotion) if it was possible to expand a libcall of an
+ // illegal type during operation legalization. But it's not, so things
+ // get a bit hacky.
+ unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if (ScaleInt > 0 &&
+ (TLI.isTypeLegal(VT) ||
+ (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
+ TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
+ Opcode, VT, ScaleInt);
+ if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
+ EVT PromVT;
+ if (VT.isScalarInteger())
+ PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
+ else if (VT.isVector()) {
+ PromVT = VT.getVectorElementType();
+ PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
+ PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
+ } else
+ llvm_unreachable("Wrong VT for DIVFIX?");
+ if (Signed) {
+ LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
+ RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
+ } else {
+ LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
+ RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
+ }
+ // TODO: Saturation.
+ SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+ }
+
+ return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
+}
+
// getUnderlyingArgRegs - Find underlying registers used for a truncated,
// bitcasted, or split argument. Returns a list of <Register, size in bits>
static void
@@ -5474,7 +5607,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// is an argument. But since we already has used %a1 to describe a parameter
// we should not handle that last dbg.value here (that would result in an
// incorrect hoisting of the DBG_VALUE to the function entry).
- // Notice that we allow one dbg.value per IR level argument, to accomodate
+ // Notice that we allow one dbg.value per IR level argument, to accommodate
// for the situation with fragments above.
if (VariableIsFunctionInputArg) {
unsigned ArgNo = Arg->getArgNo();
@@ -5489,7 +5622,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
@@ -5511,7 +5643,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (Reg) {
Op = MachineOperand::CreateReg(Reg, false);
- IsIndirect = IsDbgDeclare;
}
}
@@ -5530,15 +5661,38 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
= [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
unsigned Offset = 0;
for (auto RegAndSize : SplitRegs) {
+ // If the expression is already a fragment, the current register
+ // offset+size might extend beyond the fragment. In this case, only
+ // the register bits that are inside the fragment are relevant.
+ int RegFragmentSizeInBits = RegAndSize.second;
+ if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
+ uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
+ // The register is entirely outside the expression fragment,
+ // so is irrelevant for debug info.
+ if (Offset >= ExprFragmentSizeInBits)
+ break;
+ // The register is partially outside the expression fragment, only
+ // the low bits within the fragment are relevant for debug info.
+ if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
+ RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
+ }
+ }
+
auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegAndSize.second);
- if (!FragmentExpr)
+ Expr, Offset, RegFragmentSizeInBits);
+ Offset += RegAndSize.second;
+ // If a valid fragment expression cannot be created, the variable's
+ // correct value cannot be determined and so it is set as Undef.
+ if (!FragmentExpr) {
+ SDDbgValue *SDV = DAG.getConstantDbgValue(
+ Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
continue;
+ }
assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
RegAndSize.first, Variable, *FragmentExpr));
- Offset += RegAndSize.second;
}
};
@@ -5555,7 +5709,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
Op = MachineOperand::CreateReg(VMI->second, false);
- IsIndirect = IsDbgDeclare;
} else if (ArgRegsAndSizes.size() > 1) {
// This was split due to the calling convention, and no virtual register
// mapping exists for the value.
@@ -5569,9 +5722,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- IsIndirect = (Op->isReg()) ? IsIndirect : true;
- if (IsIndirect)
+
+ // If the argument arrives in a stack slot, then what the IR thought was a
+ // normal Value is actually in memory, and we must add a deref to load it.
+ if (Op->isFI()) {
+ int FI = Op->getIndex();
+ unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI);
+ if (Expr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ Expr = DIExpression::prependOpcodes(Expr, Ops);
+ } else {
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ }
+ }
+
+ // If this location was specified with a dbg.declare, then it and its
+ // expression calculate the address of the variable. Append a deref to
+ // force it to be a memory location.
+ if (IsDbgDeclare)
Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
*Op, Variable, Expr));
@@ -5603,20 +5773,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
/*IsIndirect*/ false, dl, DbgSDNodeOrder);
}
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
- !defined(setjmp_undefined_for_msvc)
-# pragma push_macro("setjmp")
-# undef setjmp
-# define setjmp_undefined_for_msvc
-#endif
-
static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
switch (Intrinsic) {
case Intrinsic::smul_fix:
return ISD::SMULFIX;
case Intrinsic::umul_fix:
return ISD::UMULFIX;
+ case Intrinsic::smul_fix_sat:
+ return ISD::SMULFIXSAT;
+ case Intrinsic::umul_fix_sat:
+ return ISD::UMULFIXSAT;
+ case Intrinsic::sdiv_fix:
+ return ISD::SDIVFIX;
+ case Intrinsic::udiv_fix:
+ return ISD::UDIVFIX;
default:
llvm_unreachable("Unhandled fixed point intrinsic");
}
@@ -5687,12 +5857,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
RegName, getValue(RegValue)));
return;
}
- case Intrinsic::setjmp:
- lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
- return;
- case Intrinsic::longjmp:
- lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
- return;
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
@@ -5706,7 +5870,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -5722,7 +5887,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
- SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
return;
@@ -5740,7 +5906,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
- SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
@@ -6102,44 +6269,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return;
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- case Intrinsic::experimental_constrained_fma:
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_fptrunc:
- case Intrinsic::experimental_constrained_fpext:
- case Intrinsic::experimental_constrained_sqrt:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_lrint:
- case Intrinsic::experimental_constrained_llrint:
- case Intrinsic::experimental_constrained_rint:
- case Intrinsic::experimental_constrained_nearbyint:
- case Intrinsic::experimental_constrained_maxnum:
- case Intrinsic::experimental_constrained_minnum:
- case Intrinsic::experimental_constrained_ceil:
- case Intrinsic::experimental_constrained_floor:
- case Intrinsic::experimental_constrained_lround:
- case Intrinsic::experimental_constrained_llround:
- case Intrinsic::experimental_constrained_round:
- case Intrinsic::experimental_constrained_trunc:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI.isFMAFasterThanFMulAndFAdd(VT)) {
+ TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
@@ -6307,7 +6445,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
}
case Intrinsic::smul_fix:
- case Intrinsic::umul_fix: {
+ case Intrinsic::umul_fix:
+ case Intrinsic::smul_fix_sat:
+ case Intrinsic::umul_fix_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
@@ -6315,20 +6455,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op1.getValueType(), Op1, Op2, Op3));
return;
}
- case Intrinsic::smul_fix_sat: {
+ case Intrinsic::sdiv_fix:
+ case Intrinsic::udiv_fix: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
- Op3));
- return;
- }
- case Intrinsic::umul_fix_sat: {
- SDValue Op1 = getValue(I.getArgOperand(0));
- SDValue Op2 = getValue(I.getArgOperand(1));
- SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
- Op3));
+ setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1, Op2, Op3, DAG, TLI));
return;
}
case Intrinsic::stacksave: {
@@ -6681,7 +6814,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Add the offset to the FP.
Value *FP = I.getArgOperand(1);
SDValue FPVal = getValue(FP);
- SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+ SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
setValue(&I, Add);
return;
@@ -6876,142 +7009,82 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
const ConstrainedFPIntrinsic &FPI) {
SDLoc sdl = getCurSDLoc();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ // We do not need to serialize constrained FP intrinsics against
+ // each other or against (nonvolatile) loads, so they can be
+ // chained like loads.
+ SDValue Chain = DAG.getRoot();
+ SmallVector<SDValue, 4> Opers;
+ Opers.push_back(Chain);
+ if (FPI.isUnaryOp()) {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ } else if (FPI.isTernaryOp()) {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ Opers.push_back(getValue(FPI.getArgOperand(1)));
+ Opers.push_back(getValue(FPI.getArgOperand(2)));
+ } else {
+ Opers.push_back(getValue(FPI.getArgOperand(0)));
+ Opers.push_back(getValue(FPI.getArgOperand(1)));
+ }
+
unsigned Opcode;
switch (FPI.getIntrinsicID()) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::experimental_constrained_fadd:
- Opcode = ISD::STRICT_FADD;
- break;
- case Intrinsic::experimental_constrained_fsub:
- Opcode = ISD::STRICT_FSUB;
- break;
- case Intrinsic::experimental_constrained_fmul:
- Opcode = ISD::STRICT_FMUL;
- break;
- case Intrinsic::experimental_constrained_fdiv:
- Opcode = ISD::STRICT_FDIV;
- break;
- case Intrinsic::experimental_constrained_frem:
- Opcode = ISD::STRICT_FREM;
- break;
- case Intrinsic::experimental_constrained_fma:
- Opcode = ISD::STRICT_FMA;
- break;
- case Intrinsic::experimental_constrained_fptosi:
- Opcode = ISD::STRICT_FP_TO_SINT;
- break;
- case Intrinsic::experimental_constrained_fptoui:
- Opcode = ISD::STRICT_FP_TO_UINT;
- break;
- case Intrinsic::experimental_constrained_fptrunc:
- Opcode = ISD::STRICT_FP_ROUND;
- break;
- case Intrinsic::experimental_constrained_fpext:
- Opcode = ISD::STRICT_FP_EXTEND;
- break;
- case Intrinsic::experimental_constrained_sqrt:
- Opcode = ISD::STRICT_FSQRT;
- break;
- case Intrinsic::experimental_constrained_pow:
- Opcode = ISD::STRICT_FPOW;
- break;
- case Intrinsic::experimental_constrained_powi:
- Opcode = ISD::STRICT_FPOWI;
- break;
- case Intrinsic::experimental_constrained_sin:
- Opcode = ISD::STRICT_FSIN;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ case Intrinsic::INTRINSIC: \
+ Opcode = ISD::STRICT_##DAGN; \
break;
- case Intrinsic::experimental_constrained_cos:
- Opcode = ISD::STRICT_FCOS;
- break;
- case Intrinsic::experimental_constrained_exp:
- Opcode = ISD::STRICT_FEXP;
- break;
- case Intrinsic::experimental_constrained_exp2:
- Opcode = ISD::STRICT_FEXP2;
- break;
- case Intrinsic::experimental_constrained_log:
- Opcode = ISD::STRICT_FLOG;
- break;
- case Intrinsic::experimental_constrained_log10:
- Opcode = ISD::STRICT_FLOG10;
- break;
- case Intrinsic::experimental_constrained_log2:
- Opcode = ISD::STRICT_FLOG2;
- break;
- case Intrinsic::experimental_constrained_lrint:
- Opcode = ISD::STRICT_LRINT;
- break;
- case Intrinsic::experimental_constrained_llrint:
- Opcode = ISD::STRICT_LLRINT;
- break;
- case Intrinsic::experimental_constrained_rint:
- Opcode = ISD::STRICT_FRINT;
- break;
- case Intrinsic::experimental_constrained_nearbyint:
- Opcode = ISD::STRICT_FNEARBYINT;
- break;
- case Intrinsic::experimental_constrained_maxnum:
- Opcode = ISD::STRICT_FMAXNUM;
- break;
- case Intrinsic::experimental_constrained_minnum:
- Opcode = ISD::STRICT_FMINNUM;
- break;
- case Intrinsic::experimental_constrained_ceil:
- Opcode = ISD::STRICT_FCEIL;
- break;
- case Intrinsic::experimental_constrained_floor:
- Opcode = ISD::STRICT_FFLOOR;
- break;
- case Intrinsic::experimental_constrained_lround:
- Opcode = ISD::STRICT_LROUND;
- break;
- case Intrinsic::experimental_constrained_llround:
- Opcode = ISD::STRICT_LLROUND;
- break;
- case Intrinsic::experimental_constrained_round:
- Opcode = ISD::STRICT_FROUND;
+#include "llvm/IR/ConstrainedOps.def"
+ }
+
+ // A few strict DAG nodes carry additional operands that are not
+ // set up by the default code above.
+ switch (Opcode) {
+ default: break;
+ case ISD::STRICT_FP_ROUND:
+ Opers.push_back(
+ DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
break;
- case Intrinsic::experimental_constrained_trunc:
- Opcode = ISD::STRICT_FTRUNC;
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS: {
+ auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
+ Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate())));
break;
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Chain = getRoot();
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
- ValueVTs.push_back(MVT::Other); // Out chain
+ }
SDVTList VTs = DAG.getVTList(ValueVTs);
- SDValue Result;
- if (Opcode == ISD::STRICT_FP_ROUND)
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)),
- DAG.getTargetConstant(0, sdl,
- TLI.getPointerTy(DAG.getDataLayout())) });
- else if (FPI.isUnaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)) });
- else if (FPI.isTernaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)),
- getValue(FPI.getArgOperand(1)),
- getValue(FPI.getArgOperand(2)) });
- else
- Result = DAG.getNode(Opcode, sdl, VTs,
- { Chain, getValue(FPI.getArgOperand(0)),
- getValue(FPI.getArgOperand(1)) });
-
- if (FPI.getExceptionBehavior() !=
- ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
- SDNodeFlags Flags;
- Flags.setFPExcept(true);
- Result->setFlags(Flags);
- }
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
assert(Result.getNode()->getNumValues() == 2);
+
+ // Push node to the appropriate list so that future instructions can be
+ // chained up correctly.
SDValue OutChain = Result.getValue(1);
- DAG.setRoot(OutChain);
+ switch (FPI.getExceptionBehavior().getValue()) {
+ case fp::ExceptionBehavior::ebIgnore:
+ // The only reason why ebIgnore nodes still need to be chained is that
+ // they might depend on the current rounding mode, and therefore must
+ // not be moved across instruction that may change that mode.
+ LLVM_FALLTHROUGH;
+ case fp::ExceptionBehavior::ebMayTrap:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks.
+ PendingConstrainedFP.push_back(OutChain);
+ break;
+ case fp::ExceptionBehavior::ebStrict:
+ // These must not be moved across calls or instructions that may change
+ // floating-point exception masks or read floating-point exception flags.
+ // In addition, they cannot be optimized out even if unused.
+ PendingConstrainedFPStrict.push_back(OutChain);
+ break;
+ }
+
SDValue FPResult = Result.getValue(0);
setValue(&FPI, FPResult);
}
@@ -7102,13 +7175,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
const Value *SwiftErrorVal = nullptr;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- // We can't tail call inside a function with a swifterror argument. Lowering
- // does not support this yet. It would have to move into the swifterror
- // register before the call.
- auto *Caller = CS.getInstruction()->getParent()->getParent();
- if (TLI.supportSwiftError() &&
- Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
- isTailCall = false;
+ if (isTailCall) {
+ // Avoid emitting tail calls in functions with the disable-tail-calls
+ // attribute.
+ auto *Caller = CS.getInstruction()->getParent()->getParent();
+ if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
+ "true")
+ isTailCall = false;
+
+ // We can't tail call inside a function with a swifterror argument. Lowering
+ // does not support this yet. It would have to move into the swifterror
+ // register before the call.
+ if (TLI.supportSwiftError() &&
+ Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ isTailCall = false;
+ }
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
@@ -7142,6 +7223,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
isTailCall = false;
}
+ // If call site has a cfguardtarget operand bundle, create and add an
+ // additional ArgListEntry.
+ if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
+ TargetLowering::ArgListEntry Entry;
+ Value *V = Bundle->Inputs[0];
+ SDValue ArgNode = getValue(V);
+ Entry.Node = ArgNode;
+ Entry.Ty = V->getType();
+ Entry.IsCFGuardTarget = true;
+ Args.push_back(Entry);
+ }
+
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
@@ -7374,7 +7467,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
- SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+ SDValue Root = isVol ? getRoot() : getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol,
false, /*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
@@ -7683,8 +7777,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
- assert(!I.hasOperandBundlesOtherThan(
- {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ // CFGuardTarget bundles are lowered in LowerCallTo.
+ assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+ LLVMContext::OB_funclet,
+ LLVMContext::OB_cfguardtarget}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledValue());
@@ -8182,10 +8278,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
- if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
- OpInfo.ConstraintType == TargetLowering::C_Other) &&
- OpInfo.isIndirect)) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -8197,12 +8290,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
- break;
- } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
- OpInfo.ConstraintType == TargetLowering::C_Other) &&
- !OpInfo.isIndirect) ||
- OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+ } else {
// Otherwise, this outputs to a register (directly for C_Register /
// C_RegisterClass, and a target-defined fashion for
// C_Immediate/C_Other). Find a register that we can use.
@@ -8285,8 +8373,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
- OpInfo.ConstraintType == TargetLowering::C_Other) &&
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
@@ -8339,8 +8426,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
+ OpInfo.ConstraintType == TargetLowering::C_Register) &&
"Unknown constraint type!");
// TODO: Support this.
@@ -8678,7 +8764,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
Callee = getValue(CI.getCalledValue());
NullPtr = DAG.getIntPtrConstant(0, DL, true);
- // The stackmap intrinsic only records the live variables (the arguemnts
+ // The stackmap intrinsic only records the live variables (the arguments
// passed to it) and emits NOPS (if requested). Unlike the patchpoint
// intrinsic, this won't be lowered to a function call. This means we don't
// have to worry about calling conventions and target specific lowering code.
@@ -9027,6 +9113,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsReturned = false;
Entry.IsSwiftSelf = false;
Entry.IsSwiftError = false;
+ Entry.IsCFGuardTarget = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.NumFixedArgs += 1;
@@ -9139,6 +9226,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setSwiftSelf();
if (Args[i].IsSwiftError)
Flags.setSwiftError();
+ if (Args[i].IsCFGuardTarget)
+ Flags.setCFGuardTarget();
if (Args[i].IsByVal)
Flags.setByVal();
if (Args[i].IsInAlloca) {
@@ -9214,9 +9303,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
+ // For scalable vectors the scalable part is currently handled
+ // by individual targets, so we just use the known minimum size here.
ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
- i < CLI.NumFixedArgs,
- i, j*Parts[j].getValueType().getStoreSize());
+ i < CLI.NumFixedArgs, i,
+ j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9487,7 +9578,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
/// Try to elide argument copies from memory into a local alloca. Succeeds if
/// ArgVal is a load from a suitable fixed stack object.
static void tryToElideArgumentCopy(
- FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
@@ -9507,9 +9598,9 @@ static void tryToElideArgumentCopy(
assert(ArgCopyIter != ArgCopyElisionCandidates.end());
const AllocaInst *AI = ArgCopyIter->second.first;
int FixedIndex = FINode->getIndex();
- int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
int OldIndex = AllocaIndex;
- MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
LLVM_DEBUG(
dbgs() << " argument copy elision failed due to bad fixed stack "
@@ -9518,7 +9609,7 @@ static void tryToElideArgumentCopy(
}
unsigned RequiredAlignment = AI->getAlignment();
if (!RequiredAlignment) {
- RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment(
AI->getAllocatedType());
}
if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
@@ -9584,7 +9675,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// flag to ask the target to give us the memory location of that argument if
// available.
ArgCopyElisionMapTy ArgCopyElisionCandidates;
- findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+ findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
+ ArgCopyElisionCandidates);
// Set up the incoming argument description vector.
for (const Argument &Arg : F.args()) {
@@ -9685,8 +9777,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned NumRegs = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
+ // For scalable vectors, use the minimum size; individual targets
+ // are responsible for handling scalable vector arguments and
+ // return values.
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
- ArgNo, PartBase+i*RegisterVT.getStoreSize());
+ ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
@@ -9699,7 +9794,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (NeedsRegBlock && Value == NumValues - 1)
Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
- PartBase += VT.getStoreSize();
+ PartBase += VT.getStoreSize().getKnownMinSize();
}
}
@@ -9769,7 +9864,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Elide the copying store if the target loaded this argument from a
// suitable fixed stack object.
if (Ins[i].Flags.isCopyElisionCandidate()) {
- tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
InVals[i], ArgHasUses);
}
@@ -9795,7 +9890,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned NumParts = TLI->getNumRegistersForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
- // Even an apparant 'unused' swifterror argument needs to be returned. So
+ // Even an apparent 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
if (ArgHasUses || isSwiftErrorArg) {
@@ -10508,7 +10603,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
return;
}
- SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
@@ -10557,3 +10652,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
}
}
+
+void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
+ SDValue N = getValue(I.getOperand(0));
+ setValue(&I, N);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bfcf30b430b6..18e0edf7fc04 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -143,6 +143,20 @@ private:
/// tokenfactor for them just before terminator instructions.
SmallVector<SDValue, 8> PendingExports;
+ /// Similar to loads, nodes corresponding to constrained FP intrinsics are
+ /// bunched up and emitted when necessary. These can be moved across each
+ /// other and any (normal) memory operation (load or store), but not across
+ /// calls or instructions having unspecified side effects. As a special
+ /// case, constrained FP intrinsics using fpexcept.strict may not be deleted
+ /// even if otherwise unused, so they need to be chained before any
+ /// terminator instruction (like PendingExports). We track the latter
+ /// set of nodes in a separate list.
+ SmallVector<SDValue, 8> PendingConstrainedFP;
+ SmallVector<SDValue, 8> PendingConstrainedFPStrict;
+
+ /// Update root to include all chains from the Pending list.
+ SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);
+
/// A unique monotonically increasing number used to order the SDNodes we
/// create.
unsigned SDNodeOrder;
@@ -447,12 +461,18 @@ public:
/// Return the current virtual root of the Selection DAG, flushing any
/// PendingLoad items. This must be done before emitting a store or any other
- /// node that may need to be ordered after any prior load instructions.
+ /// memory node that may need to be ordered after any prior load instructions.
+ SDValue getMemoryRoot();
+
+ /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
+ /// items. This must be done before emitting any call other any other node
+ /// that may need to be ordered after FP instructions due to other side
+ /// effects.
SDValue getRoot();
/// Similar to getRoot, but instead of flushing all the PendingLoad items,
- /// flush all the PendingExports items. It is necessary to do this before
- /// emitting a terminator instruction.
+ /// flush all the PendingExports (and PendingConstrainedFPStrict) items.
+ /// It is necessary to do this before emitting a terminator instruction.
SDValue getControlRoot();
SDLoc getCurSDLoc() const {
@@ -742,6 +762,7 @@ private:
void visitAtomicStore(const StoreInst &I);
void visitLoadFromSwiftError(const LoadInst &I);
void visitStoreToSwiftError(const StoreInst &I);
+ void visitFreeze(const FreezeInst &I);
void visitInlineAsm(ImmutableCallSite CS);
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index bc10f7621239..6fd71393bf38 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -186,7 +186,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FMINNUM_IEEE: return "fminnum_ieee";
case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee";
case ISD::FMINIMUM: return "fminimum";
+ case ISD::STRICT_FMINIMUM: return "strict_fminimum";
case ISD::FMAXIMUM: return "fmaximum";
+ case ISD::STRICT_FMAXIMUM: return "strict_fmaximum";
case ISD::FNEG: return "fneg";
case ISD::FSQRT: return "fsqrt";
case ISD::STRICT_FSQRT: return "strict_fsqrt";
@@ -270,6 +272,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
case ISD::SETCCCARRY: return "setcccarry";
+ case ISD::STRICT_FSETCC: return "strict_fsetcc";
+ case ISD::STRICT_FSETCCS: return "strict_fsetccs";
case ISD::SELECT: return "select";
case ISD::VSELECT: return "vselect";
case ISD::SELECT_CC: return "select_cc";
@@ -308,6 +312,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULFIX: return "umulfix";
case ISD::UMULFIXSAT: return "umulfixsat";
+ case ISD::SDIVFIX: return "sdivfix";
+ case ISD::UDIVFIX: return "udivfix";
+
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
case ISD::ZERO_EXTEND: return "zero_extend";
@@ -324,7 +331,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
+ case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
+ case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
@@ -541,6 +550,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getFlags().hasVectorReduction())
OS << " vector-reduction";
+ if (getFlags().hasNoFPExcept())
+ OS << " nofpexcept";
+
if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
if (!MN->memoperands_empty()) {
OS << "<";
@@ -685,6 +697,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (doExt)
OS << " from " << MLd->getMemoryVT().getEVTString();
+ const char *AM = getIndexedModeName(MLd->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
if (MLd->isExpandingLoad())
OS << ", expanding";
@@ -696,6 +712,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (MSt->isTruncatingStore())
OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+ const char *AM = getIndexedModeName(MSt->getAddressingMode());
+ if (*AM)
+ OS << ", " << AM;
+
if (MSt->isCompressingStore())
OS << ", compressing";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1f07a241a824..6c57c72d47a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,8 +27,10 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/FastISel.h"
@@ -71,10 +73,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
@@ -147,17 +151,17 @@ static cl::opt<bool>
ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
cl::desc("Pop up a window to show dags before legalize types"));
static cl::opt<bool>
-ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
- cl::desc("Pop up a window to show dags before legalize"));
+ ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before the post "
+ "legalize types dag combine pass"));
+static cl::opt<bool>
+ ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+ cl::desc("Pop up a window to show dags before legalize"));
static cl::opt<bool>
ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
cl::desc("Pop up a window to show dags before the second "
"dag combine pass"));
static cl::opt<bool>
-ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
- cl::desc("Pop up a window to show dags before the post legalize types"
- " dag combine pass"));
-static cl::opt<bool>
ViewISelDAGs("view-isel-dags", cl::Hidden,
cl::desc("Pop up a window to show isel dags as they are selected"));
static cl::opt<bool>
@@ -167,12 +171,10 @@ static cl::opt<bool>
ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
cl::desc("Pop up a window to show SUnit dags after they are processed"));
#else
-static const bool ViewDAGCombine1 = false,
- ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
- ViewDAGCombine2 = false,
- ViewDAGCombineLT = false,
- ViewISelDAGs = false, ViewSchedDAGs = false,
- ViewSUnitDAGs = false;
+static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
+ ViewDAGCombineLT = false, ViewLegalizeDAGs = false,
+ ViewDAGCombine2 = false, ViewISelDAGs = false,
+ ViewSchedDAGs = false, ViewSUnitDAGs = false;
#endif
//===---------------------------------------------------------------------===//
@@ -305,28 +307,22 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
- CodeGenOpt::Level OL) :
- MachineFunctionPass(ID), TM(tm),
- FuncInfo(new FunctionLoweringInfo()),
- SwiftError(new SwiftErrorValueTracking()),
- CurDAG(new SelectionDAG(tm, OL)),
- SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
- AA(), GFI(),
- OptLevel(OL),
- DAGSize(0) {
- initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
- initializeBranchProbabilityInfoWrapperPassPass(
- *PassRegistry::getPassRegistry());
- initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
- initializeTargetLibraryInfoWrapperPassPass(
- *PassRegistry::getPassRegistry());
- }
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
+ : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
+ SwiftError(new SwiftErrorValueTracking()),
+ CurDAG(new SelectionDAG(tm, OL)),
+ SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
+ OL)),
+ AA(), GFI(), OptLevel(OL), DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeBranchProbabilityInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+ initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+ initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
SelectionDAGISel::~SelectionDAGISel() {
- delete SDB;
delete CurDAG;
- delete FuncInfo;
delete SwiftError;
}
@@ -340,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMBPI && OptLevel != CodeGenOpt::None)
AU.addRequired<BranchProbabilityInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -442,13 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
+ getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);
@@ -735,23 +737,20 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
- SmallPtrSet<SDNode*, 16> VisitedNodes;
+ SmallPtrSet<SDNode *, 16> Added;
SmallVector<SDNode*, 128> Worklist;
Worklist.push_back(CurDAG->getRoot().getNode());
+ Added.insert(CurDAG->getRoot().getNode());
KnownBits Known;
do {
SDNode *N = Worklist.pop_back_val();
- // If we've already seen this node, ignore it.
- if (!VisitedNodes.insert(N).second)
- continue;
-
// Otherwise, add all chain operands to the worklist.
for (const SDValue &Op : N->op_values())
- if (Op.getValueType() == MVT::Other)
+ if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second)
Worklist.push_back(Op.getNode());
// If this is a CopyToReg with a vreg dest, process it.
@@ -793,8 +792,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
FuncInfo->MBB->getBasicBlock()->getName());
#endif
#ifdef NDEBUG
- if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
- ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+ if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT ||
+ ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs ||
ViewSUnitDAGs)
#endif
{
@@ -1159,10 +1158,30 @@ void SelectionDAGISel::DoInstructionSelection() {
// we convert them to normal FP opcodes instead at this point. This
// will allow them to be handled by existing target-specific instruction
// selectors.
- if (Node->isStrictFPOpcode() &&
- (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
- != TargetLowering::Legal))
- Node = CurDAG->mutateStrictFPToFP(Node);
+ if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) {
+ // For some opcodes, we need to call TLI->getOperationAction using
+ // the first operand type instead of the result type. Note that this
+ // must match what SelectionDAGLegalize::LegalizeOp is doing.
+ EVT ActionVT;
+ switch (Node->getOpcode()) {
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP:
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ ActionVT = Node->getOperand(1).getValueType();
+ break;
+ default:
+ ActionVT = Node->getValueType(0);
+ break;
+ }
+ if (TLI->getOperationAction(Node->getOpcode(), ActionVT)
+ == TargetLowering::Expand)
+ Node = CurDAG->mutateStrictFPToFP(Node);
+ }
LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
Node->dump(CurDAG));
@@ -1280,20 +1299,20 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
/// side-effect free and is either dead or folded into a generated instruction.
/// Return false if it needs to be emitted.
static bool isFoldedOrDeadInstruction(const Instruction *I,
- FunctionLoweringInfo *FuncInfo) {
+ const FunctionLoweringInfo &FuncInfo) {
return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
!I->isTerminator() && // Terminators aren't folded.
- !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
- !I->isEHPad() && // EH pad instructions aren't folded.
- !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+ !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+ !I->isEHPad() && // EH pad instructions aren't folded.
+ !FuncInfo.isExportedInst(I); // Exported instrs must be computed.
}
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
-static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
- MachineFunction *MF = FuncInfo->MF;
+static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
+ MachineFunction *MF = FuncInfo.MF;
const DataLayout &DL = MF->getDataLayout();
- for (const BasicBlock &BB : *FuncInfo->Fn) {
+ for (const BasicBlock &BB : *FuncInfo.Fn) {
for (const Instruction &I : BB) {
const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
if (!DI)
@@ -1315,11 +1334,11 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
// intrinsic and handle this during isel like dbg.value.
int FI = std::numeric_limits<int>::max();
if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
- auto SI = FuncInfo->StaticAllocaMap.find(AI);
- if (SI != FuncInfo->StaticAllocaMap.end())
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
FI = SI->second;
} else if (const auto *Arg = dyn_cast<Argument>(Address))
- FI = FuncInfo->getArgumentFrameIndex(Arg);
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI == std::numeric_limits<int>::max())
continue;
@@ -1353,7 +1372,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
FuncInfo->InsertPt = FuncInfo->MBB->begin();
- CurDAG->setFunctionLoweringInfo(FuncInfo);
+ CurDAG->setFunctionLoweringInfo(FuncInfo.get());
if (!FastIS) {
LowerArguments(Fn);
@@ -1393,7 +1412,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (FastIS && Inserted)
FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- processDbgDeclares(FuncInfo);
+ processDbgDeclares(*FuncInfo);
// Iterate over all basic blocks in the function.
StackProtector &SP = getAnalysis<StackProtector>();
@@ -1453,7 +1472,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+ if (isFoldedOrDeadInstruction(Inst, *FuncInfo) ||
ElidedArgCopyInstrs.count(Inst)) {
--NumFastIselRemaining;
continue;
@@ -1473,7 +1492,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *BeforeInst = Inst;
while (BeforeInst != &*Begin) {
BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
- if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+ if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo))
break;
}
if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
@@ -1589,7 +1608,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// But if FastISel was run, we already selected some of the block.
// If we emitted a tail-call, we need to delete any previously emitted
// instruction that follows it.
- if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
@@ -2230,10 +2249,13 @@ void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
SDLoc dl(Op);
- MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
- const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+ EVT VT = Op->getValueType(0);
+ LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
Register Reg =
- TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+ TLI->getRegisterByName(RegStr->getString().data(), Ty,
CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
@@ -2244,10 +2266,13 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
SDLoc dl(Op);
- MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
- const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
- Op->getOperand(2).getValueType(),
+ MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+ EVT VT = Op->getOperand(2).getValueType();
+ LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
+
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty,
CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
@@ -3176,13 +3201,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
// Verify that all intermediate nodes between the root and this one have
- // a single use.
+ // a single use (ignoring chains, which are handled in UpdateChains).
bool HasMultipleUses = false;
- for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
- if (!NodeStack[i].getNode()->hasOneUse()) {
- HasMultipleUses = true;
- break;
- }
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) {
+ unsigned NNonChainUses = 0;
+ SDNode *NS = NodeStack[i].getNode();
+ for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI)
+ if (UI.getUse().getValueType() != MVT::Other)
+ if (++NNonChainUses > 1) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+ }
if (HasMultipleUses) break;
// Check to see that the target thinks this is profitable to fold and that
@@ -3433,6 +3464,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
Ops.push_back(InputGlue);
+ // Check whether any matched node could raise an FP exception. Since all
+ // such nodes must have a chain, it suffices to check ChainNodesMatched.
+ // We need to perform this check before potentially modifying one of the
+ // nodes via MorphNode.
+ bool MayRaiseFPException = false;
+ for (auto *N : ChainNodesMatched)
+ if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) {
+ MayRaiseFPException = true;
+ break;
+ }
+
// Create the node.
MachineSDNode *Res = nullptr;
bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
@@ -3464,6 +3506,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
Ops, EmitNodeInfo));
}
+ // Set the NoFPExcept flag when no original matched node could
+ // raise an FP exception, but the new node potentially might.
+ if (!MayRaiseFPException && mayRaiseFPException(Res)) {
+ SDNodeFlags Flags = Res->getFlags();
+ Flags.setNoFPExcept(true);
+ Res->setFlags(Flags);
+ }
+
// If the node had chain/glue results, update our notion of the current
// chain and glue.
if (EmitNodeInfo & OPFL_GlueOutput) {
@@ -3619,6 +3669,21 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
}
+/// Return whether the node may raise an FP exception.
+bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const {
+ // For machine opcodes, consult the MCID flag.
+ if (N->isMachineOpcode()) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ return MCID.mayRaiseFPException();
+ }
+
+ // For ISD opcodes, only StrictFP opcodes may raise an FP
+ // exception.
+ if (N->isTargetOpcode())
+ return N->isTargetStrictFPOpcode();
+ return N->isStrictFPOpcode();
+}
+
bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index fad98b6f50dc..c628f379e415 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -384,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// can consider allowing spills of smaller values to larger slots
// (i.e. change the '==' in the assert below to a '>=').
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
+ assert((MFI.getObjectSize(Index) * 8) ==
+ (int64_t)Incoming.getValueSizeInBits() &&
"Bad spill: stack slot does not match!");
// Note: Using the alignment of the spill slot (rather than the abi or
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ab1324533f1..24ab65171a17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
const Function &F = DAG.getMachineFunction().getFunction();
+ // First, check if tail calls have been disabled in this function.
+ if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+ return false;
+
// Conservatively require the attributes of the call to match those of
// the return. Ignore NoAlias and NonNull because they don't affect the
// call sequence.
@@ -122,7 +126,11 @@ std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops,
MakeLibCallOptions CallOptions,
- const SDLoc &dl) const {
+ const SDLoc &dl,
+ SDValue InChain) const {
+ if (!InChain)
+ InChain = DAG.getEntryNode();
+
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
@@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
}
CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
+ .setChain(InChain)
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(CallOptions.DoesNotReturn)
.setDiscardResult(!CallOptions.IsReturnValueUsed)
@@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
ISD::CondCode &CCCode,
const SDLoc &dl, const SDValue OldLHS,
const SDValue OldRHS) const {
+ SDValue Chain;
+ return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
+ OldRHS, Chain);
+}
+
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+ SDValue &NewLHS, SDValue &NewRHS,
+ ISD::CondCode &CCCode,
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS,
+ SDValue &Chain,
+ bool IsSignaling) const {
+ // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
+ // not supporting it. We can update this code when libgcc provides such
+ // functions.
+
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
@@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
(VT == MVT::f64) ? RTLIB::OGT_F64 :
(VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
break;
+ case ISD::SETO:
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUO:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
(VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
break;
- case ISD::SETO:
- LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
- (VT == MVT::f64) ? RTLIB::O_F64 :
- (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
- break;
case ISD::SETONE:
- // SETONE = SETOLT | SETOGT
- LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
- (VT == MVT::f64) ? RTLIB::OLT_F64 :
- (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
- LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
- (VT == MVT::f64) ? RTLIB::OGT_F64 :
- (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
- break;
+ // SETONE = O && UNE
+ ShouldInvertCC = true;
+ LLVM_FALLTHROUGH;
case ISD::SETUEQ:
LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
(VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
+ auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+ NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
- if (ShouldInvertCC)
- CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
-
- if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
- SDValue Tmp = DAG.getNode(
- ISD::SETCC, dl,
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
- NewLHS = DAG.getNode(
- ISD::SETCC, dl,
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
- NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
- NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+ if (ShouldInvertCC) {
+ assert(RetVT.isInteger());
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ }
+
+ if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
+ // Update Chain.
+ Chain = Call.second;
+ } else {
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+ SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
+ auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
+ CCCode = getCmpLibcallCC(LC2);
+ if (ShouldInvertCC)
+ CCCode = getSetCCInverse(CCCode, RetVT);
+ NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
+ if (Chain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
+ Call2.second);
+ NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
+ Tmp.getValueType(), Tmp, NewLHS);
NewRHS = SDValue();
}
}
@@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
return Op.getOperand(1);
break;
}
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (DemandedBits.isSignMask() &&
+ Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+ getBooleanContents(Op0.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return Op0;
+ }
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
// If none of the extended bits are demanded, eliminate the sextinreg.
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits(
// -1, we may be able to bypass the setcc.
if (DemandedBits.isSignMask() &&
Op0.getScalarValueSizeInBits() == BitWidth &&
- getBooleanContents(VT) ==
+ getBooleanContents(Op0.getValueType()) ==
BooleanContent::ZeroOrNegativeOneBooleanContent) {
// If we're testing X < 0, then this compare isn't needed - just use X!
// FIXME: We're limiting to integer types here, but this should also work
@@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero = Known2.Zero.reverseBits();
break;
}
+ case ISD::BSWAP: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.byteSwap();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.byteSwap();
+ Known.Zero = Known2.Zero.byteSwap();
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits(
// undesirable.
break;
- auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
- if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
+ SDValue ShAmt = Src.getOperand(1);
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
+ if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
break;
-
- SDValue Shift = Src.getOperand(1);
- uint64_t ShVal = ShAmt->getZExtValue();
-
- if (TLO.LegalTypes())
- Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+ uint64_t ShVal = ShAmtC->getZExtValue();
APInt HighBits =
APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
@@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits(
if (!(HighBits & DemandedBits)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
+ if (TLO.LegalTypes())
+ ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
SDValue NewTrunc =
TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
}
break;
}
@@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1))
return true;
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnesValue() ||
+ !DemandedSrcElts.isAllOnesValue()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
Known = Known2;
if (BitWidth > EltBitWidth)
Known = Known.zext(BitWidth, false /* => any extend */);
@@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Note that where Y is variable and is known to have at most one bit set
// (for example, if it is Z & 1) we cannot do this; the expressions are not
// equivalent when Y == 0.
- Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+ assert(OpVT.isInteger());
+ Cond = ISD::getSetCCInverse(Cond, OpVT);
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(Cond, N0.getSimpleValueType()))
return DAG.getSetCC(DL, VT, N0, Zero, Cond);
@@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
// What if we invert constants? (and the target predicate)
I1.negate();
I01.negate();
- NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+ assert(XVT.isInteger());
+ NewCond = getSetCCInverse(NewCond, XVT);
if (!checkConstants())
return SDValue();
// Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
@@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAGCombinerInfo &DCI,
const SDLoc &dl) const {
SelectionDAG &DAG = DCI.DAG;
+ const DataLayout &Layout = DAG.getDataLayout();
EVT OpVT = N0.getValueType();
// Constant fold or commute setcc.
@@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
- ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+ assert(CTVT.isInteger());
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
@@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode InvCond = ISD::getSetCCInverse(
cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
- TopSetCC.getOperand(0).getValueType().isInteger());
+ TopSetCC.getOperand(0).getValueType());
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
@@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if (Mask.isSubsetOf(newMask)) {
- if (DAG.getDataLayout().isLittleEndian())
+ if (Layout.isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
else
bestOffset = (origWidth/width - offset - 1) * (width/8);
@@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
if (newVT.isRound() &&
shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
- EVT PtrType = Lod->getOperand(1).getValueType();
SDValue Ptr = Lod->getBasePtr();
if (bestOffset != 0)
- Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
- DAG.getConstant(bestOffset, dl, PtrType));
+ Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(
newVT, dl, Lod->getChain(), Ptr,
@@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
- EVT NewSetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
+ EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
@@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
- isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
+ (N0.getValueType() == MVT::i1 ||
+ getBooleanContents(N0.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
if (TrueWhenTrue)
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
- N0.getOperand(0).getValueType().isInteger());
+ CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
if (DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
@@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
- } else if (N1C->isOne() &&
- (VT == MVT::i1 ||
- getBooleanContents(N0->getValueType(0)) ==
- ZeroOrOneBooleanContent)) {
+ } else if (N1C->isOne()) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Op0.getOpcode() == ISD::XOR) &&
Op0.getOperand(0).getOpcode() == ISD::SETCC &&
Op0.getOperand(1).getOpcode() == ISD::SETCC) {
- // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
- Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
- return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
- Cond);
+ SDValue XorLHS = Op0.getOperand(0);
+ SDValue XorRHS = Op0.getOperand(1);
+ // Ensure that the input setccs return an i1 type or 0/1 value.
+ if (Op0.getValueType() == MVT::i1 ||
+ (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent &&
+ getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
+ ZeroOrOneBooleanContent)) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
+ }
}
if (Op0.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(Op0.getOperand(1)) &&
@@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
(VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
- auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
+ EVT ShiftTy =
+ getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
unsigned ShCt = AndRHS->getAPIntValue().logBase2();
if (AndRHS->getAPIntValue().isPowerOf2() &&
- ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Perform the xform if C1 is a single bit.
unsigned ShCt = C1.logBase2();
if (C1.isPowerOf2() &&
- ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (C1.getMinSignedBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
+ EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
const APInt &AndRHSC = AndRHS->getAPIntValue();
if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
- auto &DL = DAG.getDataLayout();
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
- EVT CmpTy = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl,
- ShiftTy));
- SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
- return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift =
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, ShiftTy));
+ SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+ return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+ }
}
}
} else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
@@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
NewC.lshrInPlace(ShiftBits);
if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
- isLegalICmpImmediate(NewC.getSExtValue())) {
- auto &DL = DAG.getDataLayout();
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
- EVT CmpTy = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+ isLegalICmpImmediate(NewC.getSExtValue()) &&
+ !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
DAG.getConstant(ShiftBits, dl, ShiftTy));
- SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
+ SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
}
}
@@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
+ // Indirect 'other' or 'immediate' constraints are not allowed.
+ if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+ CType == TargetLowering::C_Register ||
+ CType == TargetLowering::C_RegisterClass))
+ continue;
+
// If this is an 'other' or 'immediate' constraint, see if the operand is
// valid for it. For example, on X86 we might have an 'rI' constraint. If
// the operand is an integer in the range [0..31] we want to use I (saving a
@@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
ISD::CondCode Cond,
DAGCombinerInfo &DCI,
const SDLoc &DL) const {
- SmallVector<SDNode *, 2> Built;
+ SmallVector<SDNode *, 5> Built;
if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
DCI, DL, Built)) {
for (SDNode *N : Built)
@@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Could support comparing with non-zero too.
- ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
- return SDValue();
-
- bool HadOneDivisor = false;
- bool AllDivisorsAreOnes = true;
+ bool ComparingWithAllZeros = true;
+ bool AllComparisonsWithNonZerosAreTautological = true;
+ bool HadTautologicalLanes = false;
+ bool AllLanesAreTautological = true;
bool HadEvenDivisor = false;
bool AllDivisorsArePowerOfTwo = true;
- SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+ bool HadTautologicalInvertedLanes = false;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
- auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (CDiv->isNullValue())
return false;
- const APInt &D = C->getAPIntValue();
- // If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ const APInt &D = CDiv->getAPIntValue();
+ const APInt &Cmp = CCmp->getAPIntValue();
+
+ ComparingWithAllZeros &= Cmp.isNullValue();
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we will only be able to produce the comparison that will give the
+ // opposive tautological answer. So this lane would need to be fixed up.
+ bool TautologicalInvertedLane = D.ule(Cmp);
+ HadTautologicalInvertedLanes |= TautologicalInvertedLane;
+
+ // If all lanes are tautological (either all divisors are ones, or divisor
+ // is not greater than the constant we are comparing with),
+ // we will prefer to avoid the fold.
+ bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ HadTautologicalLanes |= TautologicalLane;
+ AllLanesAreTautological &= TautologicalLane;
+
+ // If we are comparing with non-zero, we need'll need to subtract said
+ // comparison value from the LHS. But there is no point in doing that if
+ // every lane where we are comparing with non-zero is tautological..
+ if (!Cmp.isNullValue())
+ AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
@@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
- // Q = floor((2^W - 1) / D)
- APInt Q = APInt::getAllOnesValue(W).udiv(D);
+ // Q = floor((2^W - 1) u/ D)
+ // R = ((2^W - 1) u% D)
+ APInt Q, R;
+ APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+
+ // If we are comparing with zero, then that comparison constant is okay,
+ // else it may need to be one less than that.
+ if (Cmp.ugt(R))
+ Q -= 1;
assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
- // If the divisor is 1 the result can be constant-folded.
- if (D.isOneValue()) {
+ // If the lane is tautological the result can be constant-folded.
+ if (TautologicalLane) {
// Set P and K amount to a bogus values so we can try to splat them.
P = 0;
K = -1;
- assert(Q.isAllOnesValue() &&
- "Expecting all-ones comparison for one divisor");
+ // And ensure that comparison constant is tautological,
+ // it will always compare true/false.
+ Q = -1;
}
PAmts.push_back(DAG.getConstant(P, DL, SVT));
@@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue D = REMNode.getOperand(1);
// Collect the values from each element.
- if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
return SDValue();
- // If this is a urem by a one, avoid the fold since it can be constant-folded.
- if (AllDivisorsAreOnes)
+ // If all lanes are tautological, the result can be constant-folded.
+ if (AllLanesAreTautological)
return SDValue();
// If this is a urem by a powers-of-two, avoid the fold since it can be
@@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue PVal, KVal, QVal;
if (VT.isVector()) {
- if (HadOneDivisor) {
+ if (HadTautologicalLanes) {
// Try to turn PAmts into a splat, since we don't care about the values
// that are currently '0'. If we can't, just keep '0'`s.
turnVectorIntoSplatVector(PAmts, isNullConstant);
@@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
QVal = QAmts[0];
}
+ if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
+ if (!isOperationLegalOrCustom(ISD::SUB, VT))
+ return SDValue(); // FIXME: Could/should use `ISD::ADD`?
+ assert(CompTargetNode.getValueType() == N.getValueType() &&
+ "Expecting that the types on LHS and RHS of comparisons match.");
+ N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
+ }
+
// (mul N, P)
SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
Created.push_back(Op0.getNode());
@@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
- ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ SDValue NewCC =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+ if (!HadTautologicalInvertedLanes)
+ return NewCC;
+
+ // If any lanes previously compared always-false, the NewCC will give
+ // always-true result for them, so we need to fixup those lanes.
+ // Or the other way around for inequality predicate.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+ Created.push_back(NewCC.getNode());
+
+ // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+ // if C2 is not less than C1, the comparison is always false.
+ // But we have produced the comparison that will give the
+ // opposive tautological answer. So these lanes would need to be fixed up.
+ SDValue TautologicalInvertedChannels =
+ DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
+ Created.push_back(TautologicalInvertedChannels.getNode());
+
+ if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
+ // If we have a vector select, let's replace the comparison results in the
+ // affected lanes with the correct tautological result.
+ SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
+ DL, SETCCVT, SETCCVT);
+ return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
+ Replacement, NewCC);
+ }
+
+ // Else, we can just invert the comparison result in the appropriate lanes.
+ if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
+ return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
+ TautologicalInvertedChannels);
+
+ return SDValue(); // Don't know how to lower.
}
/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
@@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
ForCodeSize, Depth + 1);
char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
ForCodeSize, Depth + 1);
- if (V0 >= V1) {
+ // TODO: This is a hack. It is possible that costs have changed between now
+ // and the initial calls to isNegatibleForFree(). That is because we
+ // are rewriting the expression, and that may change the number of
+ // uses (and therefore the cost) of values. If the negation costs are
+ // equal, only negate this value if it is a constant. Otherwise, try
+ // operand 1. A better fix would eliminate uses as a cost factor or
+ // track the change in uses as we rewrite the expression.
+ if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
// fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
SDValue Neg0 = getNegatedExpression(
Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
@@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
EVT DstVT = Node->getValueType(0);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
// Only expand vector types if we have the appropriate vector bit operations.
unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
@@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
- SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ SDValue Sel;
+
+ if (Node->isStrictFPOpcode()) {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+ Node->getOperand(0), /*IsSignaling*/ true);
+ Chain = Sel.getValue(1);
+ } else {
+ Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+ }
bool Strict = Node->isStrictFPOpcode() ||
shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
@@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
// Sel = Src < 0x8000000000000000
- // Val = select Sel, Src, Src - 0x8000000000000000
- // Ofs = select Sel, 0, 0x8000000000000000
- // Result = fp_to_sint(Val) ^ Ofs
+ // FltOfs = select Sel, 0, 0x8000000000000000
+ // IntOfs = select Sel, 0, 0x8000000000000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
// TODO: Should any fast-math-flags be set for the FSUB?
- SDValue SrcBiased;
- if (Node->isStrictFPOpcode())
- SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
- { Node->getOperand(0), Src, Cst });
- else
- SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
- SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
- DAG.getConstant(SignMask, dl, DstVT));
+ SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
+ DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+ SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
+ DAG.getConstant(0, dl, DstVT),
+ DAG.getConstant(SignMask, dl, DstVT));
SDValue SInt;
if (Node->isStrictFPOpcode()) {
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Chain, Src, FltOfs });
SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
- { SrcBiased.getValue(1), Val });
+ { Val.getValue(1), Val });
Chain = SInt.getValue(1);
- } else
+ } else {
+ SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
- Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
+ }
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
@@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
DAG.getConstant(SignMask, dl, DstVT));
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
}
return true;
}
bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
return false;
// For unsigned conversions, convert them to signed conversions using the
- // algorithm from the x86_64 __floatundidf in compiler_rt.
- SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
-
- SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
- SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
- SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
- SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
- SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
-
- SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
- SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ // algorithm from the x86_64 __floatundisf in compiler_rt.
// TODO: This really should be implemented using a branch rather than a
// select. We happen to get lucky and machinesink does the right
@@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue SignBitTest = DAG.getSetCC(
dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+ SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+ SDValue Slow, Fast;
+ if (Node->isStrictFPOpcode()) {
+ // In strict mode, we must avoid spurious exceptions, and therefore
+ // must make sure to only emit a single STRICT_SINT_TO_FP.
+ SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
+ Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), InCvt });
+ Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
+ { Fast.getValue(1), Fast, Fast });
+ Chain = Slow.getValue(1);
+ // The STRICT_SINT_TO_FP inherits the exception mode from the
+ // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+ // never raise any exception.
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+ Fast->setFlags(Flags);
+ Flags.setNoFPExcept(true);
+ Slow->setFlags(Flags);
+ } else {
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+ Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+ Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+ }
+
Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
return true;
}
@@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
- Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ if (Node->isStrictFPOpcode()) {
+ SDValue HiSub =
+ DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+ {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+ Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+ {HiSub.getValue(1), LoFlt, HiSub});
+ Chain = Result.getValue(1);
+ } else {
+ SDValue HiSub =
+ DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+ Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+ }
return true;
}
@@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
}
+ // If none of the above worked, but there are no NaNs, then expand to
+ // a compare/select sequence. This is required for correctness since
+ // InstCombine might have canonicalized a fcmp+select sequence to a
+ // FMINNUM/FMAXNUM node. If we were to fall through to the default
+ // expansion to libcall, we might introduce a link-time dependency
+ // on libm into a file that originally did not have one.
+ if (Node->getFlags().hasNoNaNs()) {
+ ISD::CondCode Pred =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+ SDValue Op1 = Node->getOperand(0);
+ SDValue Op2 = Node->getOperand(1);
+ SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
+ // Copy FMF flags, but always set the no-signed-zeros flag
+ // as this is implied by the FMINNUM/FMAXNUM semantics.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoSignedZeros(true);
+ SelCC->setFlags(Flags);
+ return SelCC;
+ }
+
return SDValue();
}
@@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
return true;
}
-SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
- SelectionDAG &DAG) const {
+std::pair<SDValue, SDValue>
+TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+ SelectionDAG &DAG) const {
SDLoc SL(LD);
SDValue Chain = LD->getChain();
SDValue BasePTR = LD->getBasePtr();
@@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
- return DAG.getMergeValues({Value, NewChain}, SL);
+ return std::make_pair(Value, NewChain);
}
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
LoadedVT.isVector()) {
// Scalarize the load and let the individual components be handled.
- SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
- if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
- return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
- return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+ return scalarizeVectorLoad(LD, DAG);
}
// Expand to a (misaligned) integer load of the same size,
@@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
- return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
+ return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
}
//===----------------------------------------------------------------------===//
@@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
return Result;
}
+SDValue
+TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+ SDValue LHS, SDValue RHS,
+ unsigned Scale, SelectionDAG &DAG) const {
+ assert((Opcode == ISD::SDIVFIX ||
+ Opcode == ISD::UDIVFIX) &&
+ "Expected a fixed point division opcode");
+
+ EVT VT = LHS.getValueType();
+ bool Signed = Opcode == ISD::SDIVFIX;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // If there is enough room in the type to upscale the LHS or downscale the
+ // RHS before the division, we can perform it in this type without having to
+ // resize. For signed operations, the LHS headroom is the number of
+ // redundant sign bits, and for unsigned ones it is the number of zeroes.
+ // The headroom for the RHS is the number of trailing zeroes.
+ unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
+ : DAG.computeKnownBits(LHS).countMinLeadingZeros();
+ unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+
+ if (LHSLead + RHSTrail < Scale)
+ return SDValue();
+
+ unsigned LHSShift = std::min(LHSLead, Scale);
+ unsigned RHSShift = Scale - LHSShift;
+
+ // At this point, we know that if we shift the LHS up by LHSShift and the
+ // RHS down by RHSShift, we can emit a regular division with a final scaling
+ // factor of Scale.
+
+ EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ if (LHSShift)
+ LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
+ DAG.getConstant(LHSShift, dl, ShiftTy));
+ if (RHSShift)
+ RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
+ DAG.getConstant(RHSShift, dl, ShiftTy));
+
+ SDValue Quot;
+ if (Signed) {
+ // For signed operations, if the resulting quotient is negative and the
+ // remainder is nonzero, subtract 1 from the quotient to round towards
+ // negative infinity.
+ SDValue Rem;
+ // FIXME: Ideally we would always produce an SDIVREM here, but if the
+ // type isn't legal, SDIVREM cannot be expanded. There is no reason why
+ // we couldn't just form a libcall, but the type legalizer doesn't do it.
+ if (isTypeLegal(VT) &&
+ isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
+ Quot = DAG.getNode(ISD::SDIVREM, dl,
+ DAG.getVTList(VT, VT),
+ LHS, RHS);
+ Rem = Quot.getValue(1);
+ Quot = Quot.getValue(0);
+ } else {
+ Quot = DAG.getNode(ISD::SDIV, dl, VT,
+ LHS, RHS);
+ Rem = DAG.getNode(ISD::SREM, dl, VT,
+ LHS, RHS);
+ }
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
+ SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
+ SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
+ SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
+ SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
+ DAG.getConstant(1, dl, VT));
+ Quot = DAG.getSelect(dl, VT,
+ DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
+ Sub1, Quot);
+ } else
+ Quot = DAG.getNode(ISD::UDIV, dl, VT,
+ LHS, RHS);
+
+ // TODO: Saturation.
+
+ return Quot;
+}
+
void TargetLowering::expandUADDSUBO(
SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
SDLoc dl(Node);
diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 17a4d76c4c80..45427dc41e6e 100644
--- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 412a00095b9b..85dd4f59fa13 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -73,6 +73,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index db520d4e6403..4abf9ea41b65 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -24,9 +23,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
#define DEBUG_TYPE "sjljehprepare"
@@ -175,9 +176,9 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// that needs to be restored on all exits from the function. This is an alloca
// because the value needs to be added to the global context list.
auto &DL = F.getParent()->getDataLayout();
- unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
- FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(),
- nullptr, Align, "fn_context", &EntryBB->front());
+ const Align Alignment(DL.getPrefTypeAlignment(FunctionContextTy));
+ FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr,
+ Alignment, "fn_context", &EntryBB->front());
// Fill in the function context structure.
for (LandingPadInst *LPI : LPads) {
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index 9fff873324d0..6664b58eccf8 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -18,6 +19,16 @@ using namespace llvm;
#define DEBUG_TYPE "slotindexes"
char SlotIndexes::ID = 0;
+
+SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+}
+
+SlotIndexes::~SlotIndexes() {
+ // The indexList's nodes are all allocated in the BumpPtrAllocator.
+ indexList.clearAndLeakNodesUnsafely();
+}
+
INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
"Slot index numbering", false, false)
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 11452fdb747a..36a0ddf67b19 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -37,6 +37,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include <algorithm>
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 641b54205d62..b6e81116286f 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -48,6 +48,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -1003,7 +1004,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// zone are okay, despite the fact that we don't have a good way
// for validating all of the usages of the calculation.
#ifndef NDEBUG
- bool TouchesMemory = I.mayLoad() || I.mayStore();
+ bool TouchesMemory = I.mayLoadOrStore();
// If we *don't* protect the user from escaped allocas, don't bother
// validating the instructions.
if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index fb2abf3daa7f..5ccfacfc26dc 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index 383c91259ffc..e16587c44a55 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -260,7 +260,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
// Create a LiveOutReg for each bit that is set in the register mask.
for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg)
- if ((Mask[Reg / 32] >> Reg % 32) & 1)
+ if ((Mask[Reg / 32] >> (Reg % 32)) & 1)
LiveOuts.push_back(createLiveOutReg(Reg, TRI));
// We don't need to keep track of a register if its super-register is already
@@ -294,14 +294,13 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
return LiveOuts;
}
-void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
+void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
+ const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult) {
MCContext &OutContext = AP.OutStreamer->getContext();
- MCSymbol *MILabel = OutContext.createTempSymbol();
- AP.OutStreamer->EmitLabel(MILabel);
-
+
LocationVec Locations;
LiveOutVec LiveOuts;
@@ -340,7 +339,7 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
// Create an expression to calculate the offset of the callsite from function
// entry.
const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(MILabel, OutContext),
+ MCSymbolRefExpr::create(&MILabel, OutContext),
MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
@@ -360,22 +359,23 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
}
-void StackMaps::recordStackMap(const MachineInstr &MI) {
+void StackMaps::recordStackMap(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
StackMapOpers opers(&MI);
const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
- recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()),
+ recordStackMapOpers(L, MI, ID, std::next(MI.operands_begin(),
+ opers.getVarIdx()),
MI.operands_end());
}
-void StackMaps::recordPatchPoint(const MachineInstr &MI) {
+void StackMaps::recordPatchPoint(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
PatchPointOpers opers(&MI);
const int64_t ID = opers.getID();
auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
- recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
+ recordStackMapOpers(L, MI, ID, MOI, MI.operands_end(),
opers.isAnyReg() && opers.hasDef());
#ifndef NDEBUG
@@ -390,14 +390,14 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
#endif
}
-void StackMaps::recordStatepoint(const MachineInstr &MI) {
+void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
StatepointOpers opers(&MI);
// Record all the deopt and gc operands (they're contiguous and run from the
// initial index to the end of the operand list)
const unsigned StartIdx = opers.getVarIdx();
- recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx,
+ recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx,
MI.operands_end(), false);
}
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 5683d1db473c..4e2189884bb1 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -61,6 +62,10 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
char StackProtector::ID = 0;
+StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+}
+
INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
"Insert stack protectors", false, true)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 9c8143c55dc2..7ae758323280 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 83acf7f80715..c2cd8fa0324e 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -42,7 +42,9 @@ SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB) {
+ MachineBasicBlock *DefaultMBB,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) {
#ifndef NDEBUG
// Clusters must be non-empty, sorted, and only contain Range clusters.
assert(!Clusters.empty());
@@ -80,7 +82,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
assert(Range >= NumCases);
// Cheap case: the whole range may be suitable for jump table.
- if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
CaseCluster JTCluster;
if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
Clusters[0] = JTCluster;
@@ -138,7 +140,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
assert(NumCases < UINT64_MAX / 100);
assert(Range >= NumCases);
- if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
int64_t NumEntries = j - i + 1;
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index ba348b4a9d41..648bf48b7d17 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -12,12 +12,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -37,6 +40,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -55,6 +60,11 @@ public:
EarlyTailDuplicate() : TailDuplicateBase(ID, true) {
initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry());
}
+
+ MachineFunctionProperties getClearedProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::NoPHIs);
+ }
};
} // end anonymous namespace
@@ -74,7 +84,11 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
return false;
auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
+ &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+ nullptr;
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 03c68a37e459..cd1278fd4d8d 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -19,13 +19,16 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +80,8 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
+ const MachineBlockFrequencyInfo *MBFIin,
+ ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
TII = MF->getSubtarget().getInstrInfo();
@@ -84,6 +89,8 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
MRI = &MF->getRegInfo();
MMI = &MF->getMMI();
MBPI = MBPIin;
+ MBFI = MBFIin;
+ PSI = PSIin;
TailDupSize = TailDupSizeIn;
assert(MBPI != nullptr && "Machine Branch Probability Info required");
@@ -555,14 +562,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (TailDupSize == 0 &&
- TailDuplicateSize.getNumOccurrences() == 0 &&
- MF->getFunction().hasOptSize())
- MaxDuplicateCount = 1;
- else if (TailDupSize == 0)
+ bool OptForSize = MF->getFunction().hasOptSize() ||
+ llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
+ if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
else
MaxDuplicateCount = TailDupSize;
+ if (OptForSize)
+ MaxDuplicateCount = 1;
// If the block to be duplicated ends in an unanalyzable fallthrough, don't
// duplicate it.
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9eeacc2584cb..bc59be890c97 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -60,6 +60,19 @@ bool TargetFrameLowering::needsFrameIndexResolution(
return MF.getFrameInfo().hasStackObjects();
}
+void TargetFrameLowering::getCalleeSaves(const MachineFunction &MF,
+ BitVector &CalleeSaves) const {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ CalleeSaves.resize(TRI.getNumRegs());
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ CalleeSaves.set(Info.getReg());
+}
+
void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 6cae3b869501..a98c627dab09 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -1015,19 +1016,16 @@ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
}
// Default implementation of CreateTargetMIHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *DAG) const {
- return (ScheduleHazardRecognizer *)
- new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
+ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+ return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
}
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfo::
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- return (ScheduleHazardRecognizer *)
- new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+ return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
//===----------------------------------------------------------------------===//
@@ -1121,18 +1119,64 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
}
Optional<ParamLoadedValue>
-TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
+ Register Reg) const {
const MachineFunction *MF = MI.getMF();
- const MachineOperand *Op = nullptr;
- DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});;
- const MachineOperand *SrcRegOp, *DestRegOp;
-
- if (isCopyInstr(MI, SrcRegOp, DestRegOp)) {
- Op = SrcRegOp;
- return ParamLoadedValue(*Op, Expr);
- } else if (MI.isMoveImmediate()) {
- Op = &MI.getOperand(1);
- return ParamLoadedValue(*Op, Expr);
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});
+ int64_t Offset;
+
+ // To simplify the sub-register handling, verify that we only need to
+ // consider physical registers.
+ assert(MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs));
+
+ if (auto DestSrc = isCopyInstr(MI)) {
+ Register DestReg = DestSrc->Destination->getReg();
+
+ if (Reg == DestReg)
+ return ParamLoadedValue(*DestSrc->Source, Expr);
+
+ // Cases where super- or sub-registers needs to be described should
+ // be handled by the target's hook implementation.
+ assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) &&
+ "TargetInstrInfo::describeLoadedValue can't describe super- or "
+ "sub-regs for copy instructions");
+ return None;
+ } else if (auto RegImm = isAddImmediate(MI, Reg)) {
+ Register SrcReg = RegImm->Reg;
+ Offset = RegImm->Imm;
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset);
+ return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+ } else if (MI.hasOneMemOperand()) {
+ // Only describe memory which provably does not escape the function. As
+ // described in llvm.org/PR43343, escaped memory may be clobbered by the
+ // callee (or by another thread).
+ const auto &TII = MF->getSubtarget().getInstrInfo();
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const MachineMemOperand *MMO = MI.memoperands()[0];
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
+
+ // If the address points to "special" memory (e.g. a spill slot), it's
+ // sufficient to check that it isn't aliased by any high-level IR value.
+ if (!PSV || PSV->mayAlias(&MFI))
+ return None;
+
+ const MachineOperand *BaseOp;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ return None;
+
+ assert(MI.getNumExplicitDefs() == 1 &&
+ "Can currently only handle mem instructions with a single define");
+
+ // TODO: In what way do we need to take Reg into consideration here?
+
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ Ops.push_back(dwarf::DW_OP_deref_size);
+ Ops.push_back(MMO->getSize());
+ Expr = DIExpression::prependOpcodes(Expr, Ops);
+ return ParamLoadedValue(*BaseOp, Expr);
}
return None;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9b23012f47e3..e5a7b70d82c8 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -88,6 +88,14 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
+// FIXME: This option is only to test if the strict fp operation processed
+// correctly by preventing mutating strict fp operation to normal fp operation
+// during development. When the backend supports strict float operation, this
+// option will be meaningless.
+static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
+ cl::desc("Don't mutate strict-float node to a legalize node"),
+ cl::init(false), cl::Hidden);
+
static bool darwinHasSinCos(const Triple &TT) {
assert(TT.isOSDarwin() && "should be called with darwin triple");
// Don't bother with 32 bit x86.
@@ -148,7 +156,6 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::OLE_F128, "__lekf2");
setLibcallName(RTLIB::OGT_F128, "__gtkf2");
setLibcallName(RTLIB::UO_F128, "__unordkf2");
- setLibcallName(RTLIB::O_F128, "__unordkf2");
}
// A few names are different on particular architectures or environments.
@@ -556,10 +563,6 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
CCs[RTLIB::UO_F64] = ISD::SETNE;
CCs[RTLIB::UO_F128] = ISD::SETNE;
CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
- CCs[RTLIB::O_F32] = ISD::SETEQ;
- CCs[RTLIB::O_F64] = ISD::SETEQ;
- CCs[RTLIB::O_F128] = ISD::SETEQ;
- CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
}
/// NOTE: The TargetMachine owns TLOF.
@@ -572,8 +575,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
- UseUnderscoreSetJmp = false;
- UseUnderscoreLongJmp = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
@@ -585,6 +586,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
GatherAllAliasesMaxDepth = 18;
+ IsStrictFPEnabled = DisableStrictNodeMutation;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
@@ -624,6 +626,8 @@ void TargetLoweringBase::initActions() {
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
setIndexedLoadAction(IM, VT, Expand);
setIndexedStoreAction(IM, VT, Expand);
+ setIndexedMaskedLoadAction(IM, VT, Expand);
+ setIndexedMaskedStoreAction(IM, VT, Expand);
}
// Most backends expect to see the node which just returns the value loaded.
@@ -654,6 +658,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
+ setOperationAction(ISD::SDIVFIX, VT, Expand);
+ setOperationAction(ISD::UDIVFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -687,6 +693,7 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT.isVector()) {
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
@@ -694,38 +701,9 @@ void TargetLoweringBase::initActions() {
}
// Constrained floating-point operations default to expand.
- setOperationAction(ISD::STRICT_FADD, VT, Expand);
- setOperationAction(ISD::STRICT_FSUB, VT, Expand);
- setOperationAction(ISD::STRICT_FMUL, VT, Expand);
- setOperationAction(ISD::STRICT_FDIV, VT, Expand);
- setOperationAction(ISD::STRICT_FREM, VT, Expand);
- setOperationAction(ISD::STRICT_FMA, VT, Expand);
- setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
- setOperationAction(ISD::STRICT_FPOW, VT, Expand);
- setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
- setOperationAction(ISD::STRICT_FSIN, VT, Expand);
- setOperationAction(ISD::STRICT_FCOS, VT, Expand);
- setOperationAction(ISD::STRICT_FEXP, VT, Expand);
- setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
- setOperationAction(ISD::STRICT_FLOG, VT, Expand);
- setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
- setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
- setOperationAction(ISD::STRICT_LRINT, VT, Expand);
- setOperationAction(ISD::STRICT_LLRINT, VT, Expand);
- setOperationAction(ISD::STRICT_FRINT, VT, Expand);
- setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
- setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
- setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
- setOperationAction(ISD::STRICT_LROUND, VT, Expand);
- setOperationAction(ISD::STRICT_LLROUND, VT, Expand);
- setOperationAction(ISD::STRICT_FROUND, VT, Expand);
- setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
- setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
- setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
- setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
- setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
- setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
- setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
+ setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
+#include "llvm/IR/ConstrainedOps.def"
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
@@ -1332,8 +1310,11 @@ void TargetLoweringBase::computeRegisterProperties(
MVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
- NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+ unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT,
NumIntermediates, RegisterVT, this);
+ NumRegistersForVT[i] = NumRegisters;
+ assert(NumRegistersForVT[i] == NumRegisters &&
+ "NumRegistersForVT size cannot represent NumRegisters!");
RegisterTypeForVT[i] = RegisterVT;
MVT NVT = VT.getPow2VectorType();
@@ -1456,6 +1437,28 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
return NumVectorRegs;
}
+bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
+ uint64_t NumCases,
+ uint64_t Range,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const {
+ // FIXME: This function check the maximum table size and density, but the
+ // minimum size is not checked. It would be nice if the minimum size is
+ // also combined within this function. Currently, the minimum size check is
+ // performed in findJumpTable() in SelectionDAGBuiler and
+ // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+ const bool OptForSize =
+ SI->getParent()->getParent()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI);
+ const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
+ const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
+
+ // Check whether the number of cases is small enough and
+ // the range is dense enough for a jump table.
+ return (OptForSize || Range <= MaxJumpTableSize) &&
+ (NumCases * 100 >= Range * MinDensity);
+}
+
/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
@@ -1641,6 +1644,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case ExtractValue: return ISD::MERGE_VALUES;
case InsertValue: return ISD::MERGE_VALUES;
case LandingPad: return 0;
+ case Freeze: return 0;
}
llvm_unreachable("Unknown instruction type encountered!");
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4978f4b9500b..8cb9814300d1 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -273,7 +273,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.SwitchSection(S);
- for (const auto &Operand : LinkerOptions->operands()) {
+ for (const auto *Operand : LinkerOptions->operands()) {
if (cast<MDNode>(Operand)->getNumOperands() != 2)
report_fatal_error("invalid llvm.linker.options");
for (const auto &Option : cast<MDNode>(Operand)->operands()) {
@@ -289,7 +289,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
Streamer.SwitchSection(S);
- for (const auto &Operand : DependentLibraries->operands()) {
+ for (const auto *Operand : DependentLibraries->operands()) {
Streamer.EmitBytes(
cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
Streamer.EmitIntValue(0, 1);
@@ -885,7 +885,7 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
// Emit the linker options if present.
if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
- for (const auto &Option : LinkerOptions->operands()) {
+ for (const auto *Option : LinkerOptions->operands()) {
SmallVector<std::string, 4> StrOptions;
for (const auto &Piece : cast<MDNode>(Option)->operands())
StrOptions.push_back(cast<MDString>(Piece)->getString());
@@ -1449,7 +1449,7 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
// linker.
MCSection *Sec = getDrectveSection();
Streamer.SwitchSection(Sec);
- for (const auto &Option : LinkerOptions->operands()) {
+ for (const auto *Option : LinkerOptions->operands()) {
for (const auto &Piece : cast<MDNode>(Option)->operands()) {
// Lead with a space for consistency with our dllexport implementation.
std::string Directive(" ");
@@ -1849,18 +1849,66 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
SC, Kind, /* BeginSymbolName */ nullptr);
}
+ if (Kind.isMergeableCString()) {
+ if (!Kind.isMergeable1ByteCString())
+ report_fatal_error("Unhandled multi-byte mergeable string kind.");
+
+ unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+ cast<GlobalVariable>(GO));
+
+ unsigned EntrySize = getEntrySizeForKind(Kind);
+ std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+ SmallString<128> Name;
+ Name = SizeSpec + utostr(Align);
+
+ return getContext().getXCOFFSection(
+ Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO),
+ Kind, /* BeginSymbolName */ nullptr);
+ }
+
if (Kind.isText())
return TextSection;
- if (Kind.isData())
+ if (Kind.isData() || Kind.isReadOnlyWithRel())
+ // TODO: We may put this under option control, because user may want to
+ // have read-only data with relocations placed into a read-only section by
+ // the compiler.
+ return DataSection;
+
+ // Zero initialized data must be emitted to the .data section because external
+ // linkage control sections that get mapped to the .bss section will be linked
+ // as tentative defintions, which is only appropriate for SectionKind::Common.
+ if (Kind.isBSS())
return DataSection;
+ if (Kind.isReadOnly())
+ return ReadOnlySection;
+
report_fatal_error("XCOFF other section types not yet implemented.");
}
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
+ const Function &F, const TargetMachine &TM) const {
+ assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF"
+ " yet.");
+ assert (!F.getComdat() && "Comdat not supported on XCOFF.");
+ //TODO: Enable emiting jump table to unique sections when we support it.
+ return ReadOnlySection;
+}
+
bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
bool UsesLabelDifference, const Function &F) const {
- report_fatal_error("TLOF XCOFF not yet implemented.");
+ return false;
+}
+
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C,
+ unsigned &Align) const {
+ //TODO: Enable emiting constant pool to unique sections when we support it.
+ return ReadOnlySection;
}
void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
@@ -1891,6 +1939,7 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
const GlobalObject *GO) {
switch (GO->getLinkage()) {
case GlobalValue::InternalLinkage:
+ case GlobalValue::PrivateLinkage:
return XCOFF::C_HIDEXT;
case GlobalValue::ExternalLinkage:
case GlobalValue::CommonLinkage:
diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 039748d817ca..d794a261ecb2 100644
--- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -28,20 +28,8 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
- // TODO: Remove support for old `fp elim` function attributes after fully
- // migrate to use "frame-pointer"
- if (!F.hasFnAttribute("frame-pointer")) {
- // Check to see if we should eliminate all frame pointers.
- if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true")
- return true;
-
- // Check to see if we should eliminate non-leaf frame pointers.
- if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf"))
- return MF.getFrameInfo().hasCalls();
-
+ if (!F.hasFnAttribute("frame-pointer"))
return false;
- }
-
StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
if (FP == "all")
return true;
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index f1f4f65adf7c..41cb511ad9b4 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Pass.h"
@@ -38,8 +39,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Threading.h"
#include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
@@ -178,10 +179,10 @@ static cl::opt<CFLAAType> UseCFLAA(
/// Option names for limiting the codegen pipeline.
/// Those are used in error reporting and we didn't want
/// to duplicate their names all over the place.
-static const char *StartAfterOptName = "start-after";
-static const char *StartBeforeOptName = "start-before";
-static const char *StopAfterOptName = "stop-after";
-static const char *StopBeforeOptName = "stop-before";
+static const char StartAfterOptName[] = "start-after";
+static const char StartBeforeOptName[] = "start-before";
+static const char StopAfterOptName[] = "stop-after";
+static const char StopBeforeOptName[] = "stop-before";
static cl::opt<std::string>
StartAfterOpt(StringRef(StartAfterOptName),
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 59eb2f9c88cb..63766df4d2be 100644
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -54,6 +54,10 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
+bool TargetSubtargetInfo::enablePostRAMachineScheduler() const {
+ return enableMachineScheduler() && enablePostRAScheduler();
+}
+
bool TargetSubtargetInfo::useAA() const {
return false;
}
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index ea971809d4e4..2b1ffab74b6f 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1287,7 +1287,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
// If the instruction is convertible to 3 Addr, instead
- // of returning try 3 Addr transformation aggresively and
+ // of returning try 3 Addr transformation aggressively and
// use this variable to check later. Because it might be better.
// For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
// instead of the following code.
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
new file mode 100644
index 000000000000..4522484222f5
--- /dev/null
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -0,0 +1,1011 @@
+//===----- TypePromotion.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is an opcode based type promotion pass for small types that would
+/// otherwise be promoted during legalisation. This works around the limitations
+/// of selection dag for cyclic regions. The search begins from icmp
+/// instructions operands where a tree, consisting of non-wrapping or safe
+/// wrapping instructions, is built, checked and promoted if possible.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DEBUG_TYPE "type-promotion"
+#define PASS_NAME "Type Promotion"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
+ cl::desc("Disable type promotion pass"));
+
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+// define hidden i32 @cmp(i8 zeroext) {
+// %2 = add i8 %0, -49
+// %3 = icmp ult i8 %2, 3
+// ..
+// }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+// t19: i32 = add t4, Constant:i32<-49>
+// t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+// subs r0, #49
+// uxtb r1, r0
+// cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+// sub.w r1, r0, #49
+// cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+// define i32 @cmp(i8 zeroext %c) {
+// %0 = zext i8 %c to i32
+// %c.off = add i32 %0, -49
+// %1 = icmp ult i32 %c.off, 3
+// ..
+// }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
+
+namespace {
+class IRPromoter {
+ LLVMContext &Ctx;
+ IntegerType *OrigTy = nullptr;
+ unsigned PromotedWidth = 0;
+ SetVector<Value*> &Visited;
+ SetVector<Value*> &Sources;
+ SetVector<Instruction*> &Sinks;
+ SmallVectorImpl<Instruction*> &SafeWrap;
+ IntegerType *ExtTy = nullptr;
+ SmallPtrSet<Value*, 8> NewInsts;
+ SmallPtrSet<Instruction*, 4> InstsToRemove;
+ DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
+ SmallPtrSet<Value*, 8> Promoted;
+
+ void ReplaceAllUsersOfWith(Value *From, Value *To);
+ void PrepareWrappingAdds(void);
+ void ExtendSources(void);
+ void ConvertTruncs(void);
+ void PromoteTree(void);
+ void TruncateSinks(void);
+ void Cleanup(void);
+
+public:
+ IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
+ SetVector<Value*> &visited, SetVector<Value*> &sources,
+ SetVector<Instruction*> &sinks,
+ SmallVectorImpl<Instruction*> &wrap) :
+ Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+ ExtTy = IntegerType::get(Ctx, PromotedWidth);
+ assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits()
+ && "Original type not smaller than extended type");
+ }
+
+ void Mutate();
+};
+
+class TypePromotion : public FunctionPass {
+ unsigned TypeSize = 0;
+ LLVMContext *Ctx = nullptr;
+ unsigned RegisterBitWidth = 0;
+ SmallPtrSet<Value*, 16> AllVisited;
+ SmallPtrSet<Instruction*, 8> SafeToPromote;
+ SmallVector<Instruction*, 4> SafeWrap;
+
+ // Does V have the same size result type as TypeSize.
+ bool EqualTypeSize(Value *V);
+ // Does V have the same size, or narrower, result type as TypeSize.
+ bool LessOrEqualTypeSize(Value *V);
+ // Does V have a result type that is wider than TypeSize.
+ bool GreaterThanTypeSize(Value *V);
+ // Does V have a result type that is narrower than TypeSize.
+ bool LessThanTypeSize(Value *V);
+ // Should V be a leaf in the promote tree?
+ bool isSource(Value *V);
+ // Should V be a root in the promotion tree?
+ bool isSink(Value *V);
+ // Should we change the result type of V? It will result in the users of V
+ // being visited.
+ bool shouldPromote(Value *V);
+ // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
+ // result won't affect the computation?
+ bool isSafeWrap(Instruction *I);
+ // Can V have its integer type promoted, or can the type be ignored.
+ bool isSupportedType(Value *V);
+ // Is V an instruction with a supported opcode or another value that we can
+ // handle, such as constants and basic blocks.
+ bool isSupportedValue(Value *V);
+ // Is V an instruction thats result can trivially promoted, or has safe
+ // wrapping.
+ bool isLegalToPromote(Value *V);
+ bool TryToPromote(Value *V, unsigned PromotedWidth);
+
+public:
+ static char ID;
+
+ TypePromotion() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ }
+
+ StringRef getPassName() const override { return PASS_NAME; }
+
+ bool runOnFunction(Function &F) override;
+};
+
+}
+
+static bool GenerateSignBits(Value *V) {
+ if (!isa<Instruction>(V))
+ return false;
+
+ unsigned Opc = cast<Instruction>(V)->getOpcode();
+ return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
+ Opc == Instruction::SRem || Opc == Instruction::SExt;
+}
+
+bool TypePromotion::EqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() == TypeSize;
+}
+
+bool TypePromotion::LessOrEqualTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() <= TypeSize;
+}
+
+bool TypePromotion::GreaterThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() > TypeSize;
+}
+
+bool TypePromotion::LessThanTypeSize(Value *V) {
+ return V->getType()->getScalarSizeInBits() < TypeSize;
+}
+
+/// Return true if the given value is a source in the use-def chain, producing
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+bool TypePromotion::isSource(Value *V) {
+ if (!isa<IntegerType>(V->getType()))
+ return false;
+
+ // TODO Allow zext to be sources.
+ if (isa<Argument>(V))
+ return true;
+ else if (isa<LoadInst>(V))
+ return true;
+ else if (isa<BitCastInst>(V))
+ return true;
+ else if (auto *Call = dyn_cast<CallInst>(V))
+ return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ else if (auto *Trunc = dyn_cast<TruncInst>(V))
+ return EqualTypeSize(Trunc);
+ return false;
+}
+
+/// Return true if V will require any promoted values to be truncated for the
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
+bool TypePromotion::isSink(Value *V) {
+ // TODO The truncate also isn't actually necessary because we would already
+ // proved that the data value is kept within the range of the original data
+ // type.
+
+ // Sinks are:
+ // - points where the value in the register is being observed, such as an
+ // icmp, switch or store.
+ // - points where value types have to match, such as calls and returns.
+ // - zext are included to ease the transformation and are generally removed
+ // later on.
+ if (auto *Store = dyn_cast<StoreInst>(V))
+ return LessOrEqualTypeSize(Store->getValueOperand());
+ if (auto *Return = dyn_cast<ReturnInst>(V))
+ return LessOrEqualTypeSize(Return->getReturnValue());
+ if (auto *ZExt = dyn_cast<ZExtInst>(V))
+ return GreaterThanTypeSize(ZExt);
+ if (auto *Switch = dyn_cast<SwitchInst>(V))
+ return LessThanTypeSize(Switch->getCondition());
+ if (auto *ICmp = dyn_cast<ICmpInst>(V))
+ return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
+
+ return isa<CallInst>(V);
+}
+
+/// Return whether this instruction can safely wrap.
+bool TypePromotion::isSafeWrap(Instruction *I) {
+ // We can support a, potentially, wrapping instruction (I) if:
+ // - It is only used by an unsigned icmp.
+ // - The icmp uses a constant.
+ // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
+ // around zero to become a larger number than before.
+ // - The wrapping instruction (I) also uses a constant.
+ //
+ // We can then use the two constants to calculate whether the result would
+ // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+ // just underflows the range, the icmp would give the same result whether the
+ // result has been truncated or not. We calculate this by:
+ // - Zero extending both constants, if needed, to 32-bits.
+ // - Take the absolute value of I's constant, adding this to the icmp const.
+ // - Check that this value is not out of range for small type. If it is, it
+ // means that it has underflowed enough to wrap around the icmp constant.
+ //
+ // For example:
+ //
+ // %sub = sub i8 %a, 2
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -2 == FE == 254
+ // But if this is evalulated as a i32
+ // %sub = -2 == FF FF FF FE == 4294967294
+ // So the unsigned compares (i8 and i32) would not yield the same result.
+ //
+ // Another way to look at it is:
+ // %a - 2 <= 254
+ // %a + 2 <= 254 + 2
+ // %a <= 256
+ // And we can't represent 256 in the i8 format, so we don't support it.
+ //
+ // Whereas:
+ //
+ // %sub i8 %a, 1
+ // %cmp = icmp ule i8 %sub, 254
+ //
+ // If %a = 0, %sub = -1 == FF == 255
+ // As i32:
+ // %sub = -1 == FF FF FF FF == 4294967295
+ //
+ // In this case, the unsigned compare results would be the same and this
+ // would also be true for ult, uge and ugt:
+ // - (255 < 254) == (0xFFFFFFFF < 254) == false
+ // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+ // - (255 > 254) == (0xFFFFFFFF > 254) == true
+ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+ //
+ // To demonstrate why we can't handle increasing values:
+ //
+ // %add = add i8 %a, 2
+ // %cmp = icmp ult i8 %add, 127
+ //
+ // If %a = 254, %add = 256 == (i8 1)
+ // As i32:
+ // %add = 256
+ //
+ // (1 < 127) != (256 < 127)
+
+ unsigned Opc = I->getOpcode();
+ if (Opc != Instruction::Add && Opc != Instruction::Sub)
+ return false;
+
+ if (!I->hasOneUse() ||
+ !isa<ICmpInst>(*I->user_begin()) ||
+ !isa<ConstantInt>(I->getOperand(1)))
+ return false;
+
+ ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
+ bool NegImm = OverflowConst->isNegative();
+ bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
+ ((Opc == Instruction::Add) && NegImm);
+ if (!IsDecreasing)
+ return false;
+
+ // Don't support an icmp that deals with sign bits.
+ auto *CI = cast<ICmpInst>(*I->user_begin());
+ if (CI->isSigned() || CI->isEquality())
+ return false;
+
+ ConstantInt *ICmpConst = nullptr;
+ if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+ ICmpConst = Const;
+ else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ ICmpConst = Const;
+ else
+ return false;
+
+ // Now check that the result can't wrap on itself.
+ APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
+ ICmpConst->getValue().zext(32) : ICmpConst->getValue();
+
+ Total += OverflowConst->getValue().getBitWidth() < 32 ?
+ OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
+
+ APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);
+
+ if (Total.getBitWidth() > Max.getBitWidth()) {
+ if (Total.ugt(Max.zext(Total.getBitWidth())))
+ return false;
+ } else if (Max.getBitWidth() > Total.getBitWidth()) {
+ if (Total.zext(Max.getBitWidth()).ugt(Max))
+ return false;
+ } else if (Total.ugt(Max))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
+ << *I << "\n");
+ SafeWrap.push_back(I);
+ return true;
+}
+
+bool TypePromotion::shouldPromote(Value *V) {
+ if (!isa<IntegerType>(V->getType()) || isSink(V))
+ return false;
+
+ if (isSource(V))
+ return true;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ if (isa<ICmpInst>(I))
+ return false;
+
+ return true;
+}
+
+/// Return whether we can safely mutate V's type to ExtTy without having to be
+/// concerned with zero extending or truncation.
+static bool isPromotedResultSafe(Value *V) {
+ if (GenerateSignBits(V))
+ return false;
+
+ if (!isa<Instruction>(V))
+ return true;
+
+ if (!isa<OverflowingBinaryOperator>(V))
+ return true;
+
+ return cast<Instruction>(V)->hasNoUnsignedWrap();
+}
+
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
+ SmallVector<Instruction*, 4> Users;
+ Instruction *InstTo = dyn_cast<Instruction>(To);
+ bool ReplacedAll = true;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
+ << "\n");
+
+ for (Use &U : From->uses()) {
+ auto *User = cast<Instruction>(U.getUser());
+ if (InstTo && User->isIdenticalTo(InstTo)) {
+ ReplacedAll = false;
+ continue;
+ }
+ Users.push_back(User);
+ }
+
+ for (auto *U : Users)
+ U->replaceUsesOfWith(From, To);
+
+ if (ReplacedAll)
+ if (auto *I = dyn_cast<Instruction>(From))
+ InstsToRemove.insert(I);
+}
+
+void IRPromoter::PrepareWrappingAdds() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
+ IRBuilder<> Builder{Ctx};
+
+ // For adds that safely wrap and use a negative immediate as operand 1, we
+ // create an equivalent instruction using a positive immediate.
+ // That positive immediate can then be zext along with all the other
+ // immediates later.
+ for (auto *I : SafeWrap) {
+ if (I->getOpcode() != Instruction::Add)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
+ assert((isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
+ "Wrapping should have a negative immediate as the second operand");
+
+ auto Const = cast<ConstantInt>(I->getOperand(1));
+ auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+ Builder.SetInsertPoint(I);
+ Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
+ if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+ NewInst->copyIRFlags(I);
+ NewInsts.insert(NewInst);
+ }
+ InstsToRemove.insert(I);
+ I->replaceAllUsesWith(NewVal);
+ LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
+ }
+ for (auto *I : NewInsts)
+ Visited.insert(I);
+}
+
+void IRPromoter::ExtendSources() {
+ IRBuilder<> Builder{Ctx};
+
+ auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
+ assert(V->getType() != ExtTy && "zext already extends to i32");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
+ Builder.SetInsertPoint(InsertPt);
+ if (auto *I = dyn_cast<Instruction>(V))
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+
+ Value *ZExt = Builder.CreateZExt(V, ExtTy);
+ if (auto *I = dyn_cast<Instruction>(ZExt)) {
+ if (isa<Argument>(V))
+ I->moveBefore(InsertPt);
+ else
+ I->moveAfter(InsertPt);
+ NewInsts.insert(I);
+ }
+
+ ReplaceAllUsersOfWith(V, ZExt);
+ };
+
+ // Now, insert extending instructions between the sources and their users.
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
+ for (auto V : Sources) {
+ LLVM_DEBUG(dbgs() << " - " << *V << "\n");
+ if (auto *I = dyn_cast<Instruction>(V))
+ InsertZExt(I, I);
+ else if (auto *Arg = dyn_cast<Argument>(V)) {
+ BasicBlock &BB = Arg->getParent()->front();
+ InsertZExt(Arg, &*BB.getFirstInsertionPt());
+ } else {
+ llvm_unreachable("unhandled source that needs extending");
+ }
+ Promoted.insert(V);
+ }
+}
+
+void IRPromoter::PromoteTree() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
+
+ IRBuilder<> Builder{Ctx};
+
+ // Mutate the types of the instructions within the tree. Here we handle
+ // constant operands.
+ for (auto *V : Visited) {
+ if (Sources.count(V))
+ continue;
+
+ auto *I = cast<Instruction>(V);
+ if (Sinks.count(I))
+ continue;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+ Value *Op = I->getOperand(i);
+ if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
+ continue;
+
+ if (auto *Const = dyn_cast<ConstantInt>(Op)) {
+ Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+ I->setOperand(i, NewConst);
+ } else if (isa<UndefValue>(Op))
+ I->setOperand(i, UndefValue::get(ExtTy));
+ }
+
+ // Mutate the result type, unless this is an icmp.
+ if (!isa<ICmpInst>(I)) {
+ I->mutateType(ExtTy);
+ Promoted.insert(I);
+ }
+ }
+}
+
+void IRPromoter::TruncateSinks() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
+
+ IRBuilder<> Builder{Ctx};
+
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+ if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
+ return nullptr;
+
+ if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
+ << *V << "\n");
+ Builder.SetInsertPoint(cast<Instruction>(V));
+ auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+ if (Trunc)
+ NewInsts.insert(Trunc);
+ return Trunc;
+ };
+
+ // Fix up any stores or returns that use the results of the promoted
+ // chain.
+ for (auto I : Sinks) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
+
+ // Handle calls separately as we need to iterate over arg operands.
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ Type *Ty = TruncTysMap[Call][i];
+ if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
+ Trunc->moveBefore(Call);
+ Call->setArgOperand(i, Trunc);
+ }
+ }
+ continue;
+ }
+
+ // Special case switches because we need to truncate the condition.
+ if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+ Type *Ty = TruncTysMap[Switch][0];
+ if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+ Trunc->moveBefore(Switch);
+ Switch->setCondition(Trunc);
+ }
+ continue;
+ }
+
+ // Now handle the others.
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Type *Ty = TruncTysMap[I][i];
+ if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
+ Trunc->moveBefore(I);
+ I->setOperand(i, Trunc);
+ }
+ }
+ }
+}
+
+void IRPromoter::Cleanup() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
+ // Some zexts will now have become redundant, along with their trunc
+ // operands, so remove them
+ for (auto V : Visited) {
+ if (!isa<ZExtInst>(V))
+ continue;
+
+ auto ZExt = cast<ZExtInst>(V);
+ if (ZExt->getDestTy() != ExtTy)
+ continue;
+
+ Value *Src = ZExt->getOperand(0);
+ if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
+ << "\n");
+ ReplaceAllUsersOfWith(ZExt, Src);
+ continue;
+ }
+
+ // Unless they produce a value that is narrower than ExtTy, we can
+ // replace the result of the zext with the input of a newly inserted
+ // trunc.
+ if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
+ Src->getType() == OrigTy) {
+ auto *Trunc = cast<TruncInst>(Src);
+ assert(Trunc->getOperand(0)->getType() == ExtTy &&
+ "expected inserted trunc to be operating on i32");
+ ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
+ }
+ }
+
+ for (auto *I : InstsToRemove) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
+ I->dropAllReferences();
+ I->eraseFromParent();
+ }
+}
+
+void IRPromoter::ConvertTruncs() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
+ IRBuilder<> Builder{Ctx};
+
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
+ continue;
+
+ auto *Trunc = cast<TruncInst>(V);
+ Builder.SetInsertPoint(Trunc);
+ IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
+ IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
+
+ unsigned NumBits = DestTy->getScalarSizeInBits();
+ ConstantInt *Mask =
+ ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
+ Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+
+ if (auto *I = dyn_cast<Instruction>(Masked))
+ NewInsts.insert(I);
+
+ ReplaceAllUsersOfWith(Trunc, Masked);
+ }
+}
+
+void IRPromoter::Mutate() {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from "
+ << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n");
+
+ // Cache original types of the values that will likely need truncating
+ for (auto *I : Sinks) {
+ if (auto *Call = dyn_cast<CallInst>(I)) {
+ for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ Value *Arg = Call->getArgOperand(i);
+ TruncTysMap[Call].push_back(Arg->getType());
+ }
+ } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+ TruncTysMap[I].push_back(Switch->getCondition()->getType());
+ else {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ TruncTysMap[I].push_back(I->getOperand(i)->getType());
+ }
+ }
+ for (auto *V : Visited) {
+ if (!isa<TruncInst>(V) || Sources.count(V))
+ continue;
+ auto *Trunc = cast<TruncInst>(V);
+ TruncTysMap[Trunc].push_back(Trunc->getDestTy());
+ }
+
+ // Convert adds using negative immediates to equivalent instructions that use
+ // positive constants.
+ PrepareWrappingAdds();
+
+ // Insert zext instructions between sources and their users.
+ ExtendSources();
+
+ // Promote visited instructions, mutating their types in place.
+ PromoteTree();
+
+ // Convert any truncs, that aren't sources, into AND masks.
+ ConvertTruncs();
+
+ // Insert trunc instructions for use by calls, stores etc...
+ TruncateSinks();
+
+ // Finally, remove unecessary zexts and truncs, delete old instructions and
+ // clear the data structures.
+ Cleanup();
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
+}
+
+/// We disallow booleans to make life easier when dealing with icmps but allow
+/// any other integer that fits in a scalar register. Void types are accepted
+/// so we can handle switches.
+bool TypePromotion::isSupportedType(Value *V) {
+ Type *Ty = V->getType();
+
+ // Allow voids and pointers, these won't be promoted.
+ if (Ty->isVoidTy() || Ty->isPointerTy())
+ return true;
+
+ if (!isa<IntegerType>(Ty) ||
+ cast<IntegerType>(Ty)->getBitWidth() == 1 ||
+ cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
+ return false;
+
+ return LessOrEqualTypeSize(V);
+}
+
+/// We accept most instructions, as well as Arguments and ConstantInsts. We
+/// Disallow casts other than zext and truncs and only allow calls if their
+/// return value is zeroext. We don't allow opcodes that can introduce sign
+/// bits.
+bool TypePromotion::isSupportedValue(Value *V) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ switch (I->getOpcode()) {
+ default:
+ return isa<BinaryOperator>(I) && isSupportedType(I) &&
+ !GenerateSignBits(I);
+ case Instruction::GetElementPtr:
+ case Instruction::Store:
+ case Instruction::Br:
+ case Instruction::Switch:
+ return true;
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ return isSupportedType(I);
+ case Instruction::ZExt:
+ return isSupportedType(I->getOperand(0));
+ case Instruction::ICmp:
+ // Now that we allow small types than TypeSize, only allow icmp of
+ // TypeSize because they will require a trunc to be legalised.
+ // TODO: Allow icmp of smaller types, and calculate at the end
+ // whether the transform would be beneficial.
+ if (isa<PointerType>(I->getOperand(0)->getType()))
+ return true;
+ return EqualTypeSize(I->getOperand(0));
+ case Instruction::Call: {
+ // Special cases for calls as we need to check for zeroext
+ // TODO We should accept calls even if they don't have zeroext, as they
+ // can still be sinks.
+ auto *Call = cast<CallInst>(I);
+ return isSupportedType(Call) &&
+ Call->hasRetAttr(Attribute::AttrKind::ZExt);
+ }
+ }
+ } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
+ return isSupportedType(V);
+ } else if (isa<Argument>(V))
+ return isSupportedType(V);
+
+ return isa<BasicBlock>(V);
+}
+
+/// Check that the type of V would be promoted and that the original type is
+/// smaller than the targeted promoted type. Check that we're not trying to
+/// promote something larger than our base 'TypeSize' type.
+bool TypePromotion::isLegalToPromote(Value *V) {
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return true;
+
+ if (SafeToPromote.count(I))
+ return true;
+
+ if (isPromotedResultSafe(V) || isSafeWrap(I)) {
+ SafeToPromote.insert(I);
+ return true;
+ }
+ return false;
+}
+
+bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
+ Type *OrigTy = V->getType();
+ TypeSize = OrigTy->getPrimitiveSizeInBits();
+ SafeToPromote.clear();
+ SafeWrap.clear();
+
+ if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
+ << TypeSize << " bits to " << PromotedWidth << "\n");
+
+ SetVector<Value*> WorkList;
+ SetVector<Value*> Sources;
+ SetVector<Instruction*> Sinks;
+ SetVector<Value*> CurrentVisited;
+ WorkList.insert(V);
+
+ // Return true if V was added to the worklist as a supported instruction,
+ // if it was already visited, or if we don't need to explore it (e.g.
+ // pointer values and GEPs), and false otherwise.
+ auto AddLegalInst = [&](Value *V) {
+ if (CurrentVisited.count(V))
+ return true;
+
+ // Ignore GEPs because they don't need promoting and the constant indices
+ // will prevent the transformation.
+ if (isa<GetElementPtrInst>(V))
+ return true;
+
+ if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
+ return false;
+ }
+
+ WorkList.insert(V);
+ return true;
+ };
+
+ // Iterate through, and add to, a tree of operands and users in the use-def.
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (CurrentVisited.count(V))
+ continue;
+
+ // Ignore non-instructions, other than arguments.
+ if (!isa<Instruction>(V) && !isSource(V))
+ continue;
+
+ // If we've already visited this value from somewhere, bail now because
+ // the tree has already been explored.
+ // TODO: This could limit the transform, ie if we try to promote something
+ // from an i8 and fail first, before trying an i16.
+ if (AllVisited.count(V))
+ return false;
+
+ CurrentVisited.insert(V);
+ AllVisited.insert(V);
+
+ // Calls can be both sources and sinks.
+ if (isSink(V))
+ Sinks.insert(cast<Instruction>(V));
+
+ if (isSource(V))
+ Sources.insert(V);
+
+ if (!isSink(V) && !isSource(V)) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Visit operands of any instruction visited.
+ for (auto &U : I->operands()) {
+ if (!AddLegalInst(U))
+ return false;
+ }
+ }
+ }
+
+ // Don't visit users of a node which isn't going to be mutated unless its a
+ // source.
+ if (isSource(V) || shouldPromote(V)) {
+ for (Use &U : V->uses()) {
+ if (!AddLegalInst(U.getUser()))
+ return false;
+ }
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n";
+ for (auto *I : CurrentVisited)
+ I->dump();
+ );
+
+ unsigned ToPromote = 0;
+ unsigned NonFreeArgs = 0;
+ SmallPtrSet<BasicBlock*, 4> Blocks;
+ for (auto *V : CurrentVisited) {
+ if (auto *I = dyn_cast<Instruction>(V))
+ Blocks.insert(I->getParent());
+
+ if (Sources.count(V)) {
+ if (auto *Arg = dyn_cast<Argument>(V))
+ if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
+ ++NonFreeArgs;
+ continue;
+ }
+
+ if (Sinks.count(cast<Instruction>(V)))
+ continue;
+ ++ToPromote;
+ }
+
+ // DAG optimisations should be able to handle these cases better, especially
+ // for function arguments.
+ if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
+ return false;
+
+ if (ToPromote < 2)
+ return false;
+
+ IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth,
+ CurrentVisited, Sources, Sinks, SafeWrap);
+ Promoter.Mutate();
+ return true;
+}
+
+bool TypePromotion::runOnFunction(Function &F) {
+ if (skipFunction(F) || DisablePromotion)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
+
+ auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ if (!TPC)
+ return false;
+
+ bool MadeChange = false;
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
+ const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
+ const TargetTransformInfo &TII =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ RegisterBitWidth = TII.getRegisterBitWidth(false);
+ Ctx = &F.getParent()->getContext();
+
+ // Search up from icmps to try to promote their operands.
+ for (BasicBlock &BB : F) {
+ for (auto &I : BB) {
+ if (AllVisited.count(&I))
+ continue;
+
+ if (!isa<ICmpInst>(&I))
+ continue;
+
+ auto *ICmp = cast<ICmpInst>(&I);
+ // Skip signed or pointer compares
+ if (ICmp->isSigned() ||
+ !isa<IntegerType>(ICmp->getOperand(0)->getType()))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
+
+ for (auto &Op : ICmp->operands()) {
+ if (auto *I = dyn_cast<Instruction>(Op)) {
+ EVT SrcVT = TLI->getValueType(DL, I->getType());
+ if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
+ break;
+
+ if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
+ TargetLowering::TypePromoteInteger)
+ break;
+
+ EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
+ if (RegisterBitWidth < PromotedVT.getSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
+ << "for promoted type\n");
+ break;
+ }
+
+ MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits());
+ break;
+ }
+ }
+ }
+ LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
+ dbgs() << F;
+ report_fatal_error("Broken function after type promotion");
+ });
+ }
+ if (MadeChange)
+ LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
+
+ return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+
+char TypePromotion::ID = 0;
+
+FunctionPass *llvm::createTypePromotionPass() {
+ return new TypePromotion();
+}
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 3289eff71336..b770e1d94488 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 73b862d51c0f..41cbdf035558 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -11,6 +11,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
using namespace llvm;
EVT EVT::changeExtendedTypeToInteger() const {
@@ -101,12 +102,12 @@ unsigned EVT::getExtendedVectorNumElements() const {
return cast<VectorType>(LLVMTy)->getNumElements();
}
-unsigned EVT::getExtendedSizeInBits() const {
+TypeSize EVT::getExtendedSizeInBits() const {
assert(isExtended() && "Type is not extended!");
if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
- return ITy->getBitWidth();
+ return TypeSize::Fixed(ITy->getBitWidth());
if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
- return VTy->getBitWidth();
+ return VTy->getPrimitiveSizeInBits();
llvm_unreachable("Unrecognized extended type!");
}
@@ -119,139 +120,14 @@ std::string EVT::getEVTString() const {
+ getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
+ if (isFloatingPoint())
+ return "f" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
- case MVT::i1: return "i1";
- case MVT::i8: return "i8";
- case MVT::i16: return "i16";
- case MVT::i32: return "i32";
- case MVT::i64: return "i64";
- case MVT::i128: return "i128";
- case MVT::f16: return "f16";
- case MVT::f32: return "f32";
- case MVT::f64: return "f64";
- case MVT::f80: return "f80";
- case MVT::f128: return "f128";
case MVT::ppcf128: return "ppcf128";
case MVT::isVoid: return "isVoid";
case MVT::Other: return "ch";
case MVT::Glue: return "glue";
case MVT::x86mmx: return "x86mmx";
- case MVT::v1i1: return "v1i1";
- case MVT::v2i1: return "v2i1";
- case MVT::v4i1: return "v4i1";
- case MVT::v8i1: return "v8i1";
- case MVT::v16i1: return "v16i1";
- case MVT::v32i1: return "v32i1";
- case MVT::v64i1: return "v64i1";
- case MVT::v128i1: return "v128i1";
- case MVT::v256i1: return "v256i1";
- case MVT::v512i1: return "v512i1";
- case MVT::v1024i1: return "v1024i1";
- case MVT::v1i8: return "v1i8";
- case MVT::v2i8: return "v2i8";
- case MVT::v4i8: return "v4i8";
- case MVT::v8i8: return "v8i8";
- case MVT::v16i8: return "v16i8";
- case MVT::v32i8: return "v32i8";
- case MVT::v64i8: return "v64i8";
- case MVT::v128i8: return "v128i8";
- case MVT::v256i8: return "v256i8";
- case MVT::v1i16: return "v1i16";
- case MVT::v2i16: return "v2i16";
- case MVT::v3i16: return "v3i16";
- case MVT::v4i16: return "v4i16";
- case MVT::v8i16: return "v8i16";
- case MVT::v16i16: return "v16i16";
- case MVT::v32i16: return "v32i16";
- case MVT::v64i16: return "v64i16";
- case MVT::v128i16: return "v128i16";
- case MVT::v1i32: return "v1i32";
- case MVT::v2i32: return "v2i32";
- case MVT::v3i32: return "v3i32";
- case MVT::v4i32: return "v4i32";
- case MVT::v5i32: return "v5i32";
- case MVT::v8i32: return "v8i32";
- case MVT::v16i32: return "v16i32";
- case MVT::v32i32: return "v32i32";
- case MVT::v64i32: return "v64i32";
- case MVT::v128i32: return "v128i32";
- case MVT::v256i32: return "v256i32";
- case MVT::v512i32: return "v512i32";
- case MVT::v1024i32:return "v1024i32";
- case MVT::v2048i32:return "v2048i32";
- case MVT::v1i64: return "v1i64";
- case MVT::v2i64: return "v2i64";
- case MVT::v4i64: return "v4i64";
- case MVT::v8i64: return "v8i64";
- case MVT::v16i64: return "v16i64";
- case MVT::v32i64: return "v32i64";
- case MVT::v1i128: return "v1i128";
- case MVT::v1f32: return "v1f32";
- case MVT::v2f32: return "v2f32";
- case MVT::v2f16: return "v2f16";
- case MVT::v3f16: return "v3f16";
- case MVT::v4f16: return "v4f16";
- case MVT::v8f16: return "v8f16";
- case MVT::v16f16: return "v16f16";
- case MVT::v32f16: return "v32f16";
- case MVT::v3f32: return "v3f32";
- case MVT::v4f32: return "v4f32";
- case MVT::v5f32: return "v5f32";
- case MVT::v8f32: return "v8f32";
- case MVT::v16f32: return "v16f32";
- case MVT::v32f32: return "v32f32";
- case MVT::v64f32: return "v64f32";
- case MVT::v128f32: return "v128f32";
- case MVT::v256f32: return "v256f32";
- case MVT::v512f32: return "v512f32";
- case MVT::v1024f32:return "v1024f32";
- case MVT::v2048f32:return "v2048f32";
- case MVT::v1f64: return "v1f64";
- case MVT::v2f64: return "v2f64";
- case MVT::v4f64: return "v4f64";
- case MVT::v8f64: return "v8f64";
- case MVT::nxv1i1: return "nxv1i1";
- case MVT::nxv2i1: return "nxv2i1";
- case MVT::nxv4i1: return "nxv4i1";
- case MVT::nxv8i1: return "nxv8i1";
- case MVT::nxv16i1: return "nxv16i1";
- case MVT::nxv32i1: return "nxv32i1";
- case MVT::nxv1i8: return "nxv1i8";
- case MVT::nxv2i8: return "nxv2i8";
- case MVT::nxv4i8: return "nxv4i8";
- case MVT::nxv8i8: return "nxv8i8";
- case MVT::nxv16i8: return "nxv16i8";
- case MVT::nxv32i8: return "nxv32i8";
- case MVT::nxv1i16: return "nxv1i16";
- case MVT::nxv2i16: return "nxv2i16";
- case MVT::nxv4i16: return "nxv4i16";
- case MVT::nxv8i16: return "nxv8i16";
- case MVT::nxv16i16:return "nxv16i16";
- case MVT::nxv32i16:return "nxv32i16";
- case MVT::nxv1i32: return "nxv1i32";
- case MVT::nxv2i32: return "nxv2i32";
- case MVT::nxv4i32: return "nxv4i32";
- case MVT::nxv8i32: return "nxv8i32";
- case MVT::nxv16i32:return "nxv16i32";
- case MVT::nxv32i32:return "nxv32i32";
- case MVT::nxv1i64: return "nxv1i64";
- case MVT::nxv2i64: return "nxv2i64";
- case MVT::nxv4i64: return "nxv4i64";
- case MVT::nxv8i64: return "nxv8i64";
- case MVT::nxv16i64:return "nxv16i64";
- case MVT::nxv32i64:return "nxv32i64";
- case MVT::nxv2f16: return "nxv2f16";
- case MVT::nxv4f16: return "nxv4f16";
- case MVT::nxv8f16: return "nxv8f16";
- case MVT::nxv1f32: return "nxv1f32";
- case MVT::nxv2f32: return "nxv2f32";
- case MVT::nxv4f32: return "nxv4f32";
- case MVT::nxv8f32: return "nxv8f32";
- case MVT::nxv16f32:return "nxv16f32";
- case MVT::nxv1f64: return "nxv1f64";
- case MVT::nxv2f64: return "nxv2f64";
- case MVT::nxv4f64: return "nxv4f64";
- case MVT::nxv8f64: return "nxv8f64";
case MVT::Metadata:return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::exnref : return "exnref";
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 865a1cfbf43a..1582f12ad580 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -87,6 +87,8 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index cdf79374e974..87958a738c67 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -20,17 +20,19 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 119c3fd1ec7f..4847a0c3e842 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"