summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-26 19:45:00 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-26 19:45:00 +0000
commit12f3ca4cdb95b193af905a00e722a4dcb40b3de3 (patch)
treeae1a7fcfc24a8d4b23206c57121c3f361d4b7f84 /lib
parentd99dafe2e4a385dd2a6c76da6d8258deb100657b (diff)
Notes
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp12
-rw-r--r--lib/Analysis/ConstantFolding.cpp17
-rw-r--r--lib/Analysis/DemandedBits.cpp31
-rw-r--r--lib/Analysis/DomPrinter.cpp16
-rw-r--r--lib/Analysis/IVUsers.cpp12
-rw-r--r--lib/Analysis/InlineCost.cpp1
-rw-r--r--lib/Analysis/InstructionSimplify.cpp535
-rw-r--r--lib/Analysis/Lint.cpp14
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp9
-rw-r--r--lib/Analysis/ScalarEvolution.cpp265
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp71
-rw-r--r--lib/Analysis/ValueTracking.cpp853
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp71
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp12
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h31
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp6
-rw-r--r--lib/CodeGen/AntiDepBreaker.h19
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp74
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp19
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp8
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h7
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp6
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp4
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp42
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--lib/CodeGen/MIRPrinter.cpp3
-rw-r--r--lib/CodeGen/MachineInstr.cpp7
-rw-r--r--lib/CodeGen/MachineLICM.cpp10
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp8
-rw-r--r--lib/CodeGen/RegAllocFast.cpp5
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp110
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp24
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp63
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp45
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp105
-rw-r--r--lib/CodeGen/StackMaps.cpp5
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp12
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp11
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp16
-rw-r--r--lib/CodeGen/VirtRegMap.cpp5
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp19
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp14
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp15
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp11
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp22
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp16
-rw-r--r--lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp5
-rw-r--r--lib/DebugInfo/PDB/DIA/DIASession.cpp17
-rw-r--r--lib/DebugInfo/PDB/Native/ModStream.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp3
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp355
-rw-r--r--lib/Fuzzer/CMakeLists.txt89
-rw-r--r--lib/Fuzzer/FuzzerDefs.h18
-rw-r--r--lib/Fuzzer/FuzzerMerge.h2
-rw-r--r--lib/IR/AsmWriter.cpp45
-rw-r--r--lib/IR/AttributeImpl.h15
-rw-r--r--lib/IR/Attributes.cpp24
-rw-r--r--lib/IR/GCOV.cpp10
-rw-r--r--lib/IR/Value.cpp19
-rw-r--r--lib/LTO/LTO.cpp3
-rw-r--r--lib/LTO/LTOBackend.cpp18
-rw-r--r--lib/MC/MCParser/AsmParser.cpp13
-rw-r--r--lib/MC/WasmObjectWriter.cpp2
-rw-r--r--lib/Object/ELF.cpp13
-rw-r--r--lib/Object/ELFObjectFile.cpp21
-rw-r--r--lib/Object/IRSymtab.cpp31
-rw-r--r--lib/Object/MachOObjectFile.cpp215
-rw-r--r--lib/Object/ModuleSummaryIndexObjectFile.cpp27
-rw-r--r--lib/Object/ModuleSymbolTable.cpp32
-rw-r--r--lib/Object/RecordStreamer.cpp5
-rw-r--r--lib/Object/RecordStreamer.h23
-rw-r--r--lib/Object/WasmObjectFile.cpp12
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp5
-rw-r--r--lib/Passes/PassBuilder.cpp4
-rw-r--r--lib/Support/APFloat.cpp31
-rw-r--r--lib/Support/APInt.cpp233
-rw-r--r--lib/Support/DynamicLibrary.cpp1
-rw-r--r--lib/Support/Triple.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp1
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp19
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp305
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h15
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td3
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td4
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp12
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h2
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp41
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkor.td10
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td31
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorWriteRes.td31
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp12
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td3
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp96
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td5
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td16
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp6
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp74
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td75
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp4
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp24
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp185
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp120
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h2
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp132
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp4
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp10
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp58
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h4
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td3
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td40
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp2
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h24
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp62
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td36
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp3
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp1
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp13
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td7
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp53
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp59
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.h3
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h4
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp4
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h4
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp3
-rw-r--r--lib/Target/AVR/AVRExpandPseudoInsts.cpp130
-rw-r--r--lib/Target/AVR/AVRFrameLowering.cpp4
-rw-r--r--lib/Target/AVR/AVRInstrInfo.cpp8
-rw-r--r--lib/Target/AVR/AVRRegisterInfo.cpp5
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp30
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp43
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp8
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp6
-rw-r--r--lib/Target/Mips/MipsCCState.cpp17
-rw-r--r--lib/Target/Mips/MipsCCState.h14
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp6
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp11
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp25
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp5
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp22
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp26
-rw-r--r--lib/Target/Mips/Relocation.txt125
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp2
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp17
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp12
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp3
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp2
-rw-r--r--lib/Target/X86/X86.h3
-rw-r--r--lib/Target/X86/X86.td11
-rw-r--r--lib/Target/X86/X86CallLowering.cpp28
-rw-r--r--lib/Target/X86/X86CallLowering.h5
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp28
-rw-r--r--lib/Target/X86/X86FrameLowering.h2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp24
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td10
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp107
-rw-r--r--lib/Target/X86/X86InstrInfo.h2
-rw-r--r--lib/Target/X86/X86InstrInfo.td1
-rw-r--r--lib/Target/X86/X86InstrSSE.td38
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp33
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp53
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp62
-rw-r--r--lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp13
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp4
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.cpp24
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp51
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp32
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp115
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp331
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp42
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp30
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp67
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h22
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp9
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp431
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp43
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp32
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp4
-rw-r--r--lib/Transforms/ObjCARC/PtrState.cpp6
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp212
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp30
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp7
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp41
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp30
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp5
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp22
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp2
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp14
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp9
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp43
-rw-r--r--lib/Transforms/Utils/Local.cpp54
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp14
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp8
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp23
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp18
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp38
-rw-r--r--lib/Transforms/Vectorize/LoadStoreVectorizer.cpp32
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp27
239 files changed, 5090 insertions, 3509 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 3db041cc0fa6..537823020301 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -924,8 +924,8 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
uint64_t V2Size,
const DataLayout &DL) {
- assert(GEP1->getPointerOperand()->stripPointerCasts() ==
- GEP2->getPointerOperand()->stripPointerCasts() &&
+ assert(GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
+ GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
GEP1->getPointerOperandType() == GEP2->getPointerOperandType() &&
"Expected GEPs with the same pointer operand");
@@ -1184,8 +1184,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// If we know the two GEPs are based off of the exact same pointer (and not
// just the same underlying object), see if that tells us anything about
// the resulting pointers.
- if (GEP1->getPointerOperand()->stripPointerCasts() ==
- GEP2->getPointerOperand()->stripPointerCasts() &&
+ if (GEP1->getPointerOperand()->stripPointerCastsAndBarriers() ==
+ GEP2->getPointerOperand()->stripPointerCastsAndBarriers() &&
GEP1->getPointerOperandType() == GEP2->getPointerOperandType()) {
AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
// If we couldn't find anything interesting, don't abandon just yet.
@@ -1500,8 +1500,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
return NoAlias;
// Strip off any casts if they exist.
- V1 = V1->stripPointerCasts();
- V2 = V2->stripPointerCasts();
+ V1 = V1->stripPointerCastsAndBarriers();
+ V2 = V2->stripPointerCastsAndBarriers();
// If V1 or V2 is undef, the result is NoAlias because we can always pick a
// value for undef that aliases nothing in the program.
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 14176dac2104..863fbdba7e67 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cerrno>
@@ -687,21 +688,21 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
if (Opc == Instruction::And) {
unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType());
- APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
- APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
- computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
- computeKnownBits(Op1, KnownZero1, KnownOne1, DL);
- if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
+ KnownBits Known0(BitWidth);
+ KnownBits Known1(BitWidth);
+ computeKnownBits(Op0, Known0, DL);
+ computeKnownBits(Op1, Known1, DL);
+ if ((Known1.One | Known0.Zero).isAllOnesValue()) {
// All the bits of Op0 that the 'and' could be masking are already zero.
return Op0;
}
- if ((KnownOne0 | KnownZero1).isAllOnesValue()) {
+ if ((Known0.One | Known1.Zero).isAllOnesValue()) {
// All the bits of Op1 that the 'and' could be masking are already zero.
return Op1;
}
- APInt KnownZero = KnownZero0 | KnownZero1;
- APInt KnownOne = KnownOne0 & KnownOne1;
+ APInt KnownZero = Known0.Zero | Known1.Zero;
+ APInt KnownOne = Known0.One & Known1.One;
if ((KnownZero | KnownOne).isAllOnesValue()) {
return ConstantInt::get(Op0->getType(), KnownOne);
}
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 151c0b0e6c93..285339deaaf5 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -37,6 +37,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -72,8 +73,7 @@ static bool isAlwaysLive(Instruction *I) {
void DemandedBits::determineLiveOperandBits(
const Instruction *UserI, const Instruction *I, unsigned OperandNo,
- const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2) {
+ const APInt &AOut, APInt &AB, KnownBits &Known, KnownBits &Known2) {
unsigned BitWidth = AB.getBitWidth();
// We're called once per operand, but for some instructions, we need to
@@ -85,15 +85,13 @@ void DemandedBits::determineLiveOperandBits(
auto ComputeKnownBits =
[&](unsigned BitWidth, const Value *V1, const Value *V2) {
const DataLayout &DL = I->getModule()->getDataLayout();
- KnownZero = APInt(BitWidth, 0);
- KnownOne = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+ Known = KnownBits(BitWidth);
+ computeKnownBits(const_cast<Value *>(V1), Known, DL, 0,
&AC, UserI, &DT);
if (V2) {
- KnownZero2 = APInt(BitWidth, 0);
- KnownOne2 = APInt(BitWidth, 0);
- computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+ Known2 = KnownBits(BitWidth);
+ computeKnownBits(const_cast<Value *>(V2), Known2, DL,
0, &AC, UserI, &DT);
}
};
@@ -120,7 +118,7 @@ void DemandedBits::determineLiveOperandBits(
// known to be one.
ComputeKnownBits(BitWidth, I, nullptr);
AB = APInt::getHighBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+ std::min(BitWidth, Known.One.countLeadingZeros()+1));
}
break;
case Intrinsic::cttz:
@@ -130,7 +128,7 @@ void DemandedBits::determineLiveOperandBits(
// known to be one.
ComputeKnownBits(BitWidth, I, nullptr);
AB = APInt::getLowBitsSet(BitWidth,
- std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+ std::min(BitWidth, Known.One.countTrailingZeros()+1));
}
break;
}
@@ -200,11 +198,11 @@ void DemandedBits::determineLiveOperandBits(
// dead).
if (OperandNo == 0) {
ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownZero2;
+ AB &= ~Known2.Zero;
} else {
if (!isa<Instruction>(UserI->getOperand(0)))
ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownZero & ~KnownZero2);
+ AB &= ~(Known.Zero & ~Known2.Zero);
}
break;
case Instruction::Or:
@@ -216,11 +214,11 @@ void DemandedBits::determineLiveOperandBits(
// dead).
if (OperandNo == 0) {
ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
- AB &= ~KnownOne2;
+ AB &= ~Known2.One;
} else {
if (!isa<Instruction>(UserI->getOperand(0)))
ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
- AB &= ~(KnownOne & ~KnownOne2);
+ AB &= ~(Known.One & ~Known2.One);
}
break;
case Instruction::Xor:
@@ -318,7 +316,7 @@ void DemandedBits::performAnalysis() {
if (!UserI->getType()->isIntegerTy())
Visited.insert(UserI);
- APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+ KnownBits Known, Known2;
// Compute the set of alive bits for each operand. These are anded into the
// existing set, if any, and if that changes the set of alive bits, the
// operand is added to the work-list.
@@ -335,8 +333,7 @@ void DemandedBits::performAnalysis() {
// Bits of each operand that are used to compute alive bits of the
// output are alive, all others are dead.
determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
- KnownZero, KnownOne,
- KnownZero2, KnownOne2);
+ Known, Known2);
}
// If we've added to the set of alive bits (or the operand has not
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index 7acfb41500d4..8abc0e7d0df9 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -80,6 +80,22 @@ struct DOTGraphTraits<PostDominatorTree*>
};
}
+void DominatorTree::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+ ViewGraph(this, Name, false, Title);
+#else
+ errs() << "DomTree dump not available, build with DEBUG\n";
+#endif // NDEBUG
+}
+
+void DominatorTree::viewGraph() {
+#ifndef NDEBUG
+ this->viewGraph("domtree", "Dominator Tree for function");
+#else
+ errs() << "DomTree dump not available, build with DEBUG\n";
+#endif // NDEBUG
+}
+
namespace {
struct DominatorTreeWrapperPassAnalysisGraphTraits {
static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) {
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index fde805a5fde5..c30feb973e60 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -253,18 +253,8 @@ bool IVUsers::AddUsersImpl(Instruction *I,
const SCEV *OriginalISE = ISE;
auto NormalizePred = [&](const SCEVAddRecExpr *AR) {
- // We only allow affine AddRecs to be normalized, otherwise we would not
- // be able to correctly denormalize.
- // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
- // Normalized form: {-2,+,1,+,2}
- // Denormalized form: {1,+,3,+,2}
- //
- // However, denormalization would use a different step expression than
- // normalization (see getPostIncExpr), generating the wrong final
- // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
auto *L = AR->getLoop();
- bool Result =
- AR->isAffine() && IVUseShouldUsePostIncValue(User, I, L, DT);
+ bool Result = IVUseShouldUsePostIncValue(User, I, L, DT);
if (Result)
NewUse.PostIncLoops.insert(L);
return Result;
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 1f8dec2aed80..788f908bafca 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -1557,7 +1557,6 @@ InlineParams llvm::getInlineParams(int Threshold) {
Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
// Set the OptMinSizeThreshold and OptSizeThreshold params only if the
- // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
// -inlinehint-threshold commandline option is not explicitly given. If that
// option is present, then its value applies even for callees with size and
// minsize attributes.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 2259fbaeb982..e720e3ebecdb 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/KnownBits.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -46,34 +47,19 @@ enum { RecursionLimit = 3 };
STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
-namespace {
-struct Query {
- const DataLayout &DL;
- const TargetLibraryInfo *TLI;
- const DominatorTree *DT;
- AssumptionCache *AC;
- const Instruction *CxtI;
-
- Query(const DataLayout &DL, const TargetLibraryInfo *tli,
- const DominatorTree *dt, AssumptionCache *ac = nullptr,
- const Instruction *cxti = nullptr)
- : DL(DL), TLI(tli), DT(dt), AC(ac), CxtI(cxti) {}
-};
-} // end anonymous namespace
-
-static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
+static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
- const Query &, unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
+ const SimplifyQuery &, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse);
-static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+ const SimplifyQuery &Q, unsigned MaxRecurse);
+static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyCastInst(unsigned, Value *, Type *,
- const Query &, unsigned);
+ const SimplifyQuery &, unsigned);
/// For a boolean type or a vector of boolean type, return false or a vector
/// with every element false.
@@ -138,7 +124,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
- Instruction::BinaryOps OpcodeToExpand, const Query &Q,
+ Instruction::BinaryOps OpcodeToExpand, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -196,7 +182,7 @@ static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
/// Generic simplifications for associative binary operations.
/// Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
- Value *LHS, Value *RHS, const Query &Q,
+ Value *LHS, Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
@@ -295,7 +281,7 @@ static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
/// of the select results in the same value. Returns the common value if so,
/// otherwise returns null.
static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
- Value *RHS, const Query &Q,
+ Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -367,7 +353,7 @@ static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
/// comparison by seeing whether both branches of the select result in the same
/// value. Returns the common value if so, otherwise returns null.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const Query &Q,
+ Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -449,7 +435,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
/// phi values yields the same result for every value. If so returns the common
/// value, otherwise returns null.
static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
- Value *RHS, const Query &Q,
+ Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
@@ -492,7 +478,7 @@ static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
/// yields the same result every time. If so returns the common result,
/// otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
@@ -527,7 +513,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
Value *&Op0, Value *&Op1,
- const Query &Q) {
+ const SimplifyQuery &Q) {
if (auto *CLHS = dyn_cast<Constant>(Op0)) {
if (auto *CRHS = dyn_cast<Constant>(Op1))
return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
@@ -542,7 +528,7 @@ static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
/// Given operands for an Add, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
return C;
@@ -601,10 +587,15 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const SimplifyQuery &Query) {
+ return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query, RecursionLimit);
+}
+
/// \brief Compute the base pointer and cumulative constant offsets for V.
///
/// This strips all constant offsets off of V, leaving it the base pointer, and
@@ -679,7 +670,7 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
/// Given operands for a Sub, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q))
return C;
@@ -703,10 +694,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return Op0;
unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (KnownZero.isMaxSignedValue()) {
+ KnownBits Known(BitWidth);
+ computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (Known.Zero.isMaxSignedValue()) {
// Op1 is either 0 or the minimum signed value. If the sub is NSW, then
// Op1 must be 0 because negating the minimum signed value is undefined.
if (isNSW)
@@ -813,14 +803,19 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const SimplifyQuery &Q) {
+ return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
+}
+
/// Given operands for an FAdd, see if we can fold the result. If not, this
/// returns null.
static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
return C;
@@ -854,7 +849,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
/// Given operands for an FSub, see if we can fold the result. If not, this
/// returns null.
static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
return C;
@@ -886,7 +881,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
/// Given the operands for an FMul, see if we can fold the result
static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
return C;
@@ -903,7 +898,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
/// Given operands for a Mul, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q))
return C;
@@ -963,34 +958,52 @@ Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFAddInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFAddInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFSubInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFSubInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFMulInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFMulInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyMulInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyMulInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
}
/// Check for common or similar folds of integer division or integer remainder.
@@ -1047,7 +1060,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
/// Given operands for an SDiv or UDiv, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
@@ -1103,7 +1116,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// Given operands for an SDiv, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1115,13 +1128,16 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifySDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a UDiv, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1143,12 +1159,15 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyUDivInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyUDivInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
}
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const Query &Q, unsigned) {
+ const SimplifyQuery &Q, unsigned) {
if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
return C;
@@ -1193,14 +1212,19 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFDivInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFDivInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
/// Given operands for an SRem or URem, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
@@ -1231,7 +1255,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
/// Given operands for an SRem, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1243,13 +1267,16 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySRemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifySRemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a URem, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1271,12 +1298,15 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyURemInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyURemInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
}
static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const Query &Q, unsigned) {
+ const SimplifyQuery &Q, unsigned) {
if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
return C;
@@ -1302,10 +1332,15 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFRemInst(Op0, Op1, FMF, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFRemInst(Op0, Op1, FMF, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const SimplifyQuery &Q) {
+ return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit);
+}
+
/// Returns true if a shift by \c Amount always yields undef.
static bool isUndefShift(Value *Amount) {
Constant *C = dyn_cast<Constant>(Amount);
@@ -1336,7 +1371,7 @@ static bool isUndefShift(Value *Amount) {
/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
- Value *Op1, const Query &Q, unsigned MaxRecurse) {
+ Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
@@ -1367,17 +1402,15 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
// If any bits in the shift amount make that value greater than or equal to
// the number of bits in the type, the shift is undefined.
unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (KnownOne.getLimitedValue() >= BitWidth)
+ KnownBits Known(BitWidth);
+ computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ if (Known.One.getLimitedValue() >= BitWidth)
return UndefValue::get(Op0->getType());
// If all valid bits in the shift amount are known zero, the first operand is
// unchanged.
unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth);
- APInt ShiftAmountMask = APInt::getLowBitsSet(BitWidth, NumValidShiftBits);
- if ((KnownZero & ShiftAmountMask) == ShiftAmountMask)
+ if (Known.Zero.countTrailingOnes() >= NumValidShiftBits)
return Op0;
return nullptr;
@@ -1386,7 +1419,7 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
/// \brief Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
- Value *Op1, bool isExact, const Query &Q,
+ Value *Op1, bool isExact, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1403,11 +1436,9 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
// The low bit cannot be shifted out of an exact shift if it is set.
if (isExact) {
unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
- APInt Op0KnownZero(BitWidth, 0);
- APInt Op0KnownOne(BitWidth, 0);
- computeKnownBits(Op0, Op0KnownZero, Op0KnownOne, Q.DL, /*Depth=*/0, Q.AC,
- Q.CxtI, Q.DT);
- if (Op0KnownOne[0])
+ KnownBits Op0Known(BitWidth);
+ computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ if (Op0Known.One[0])
return Op0;
}
@@ -1417,7 +1448,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
/// Given operands for an Shl, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1437,14 +1468,19 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+ const SimplifyQuery &Q) {
+ return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
+}
+
/// Given operands for an LShr, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
MaxRecurse))
return V;
@@ -1462,14 +1498,19 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyLShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyLShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+ const SimplifyQuery &Q) {
+ return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
+}
+
/// Given operands for an AShr, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
MaxRecurse))
return V;
@@ -1496,10 +1537,15 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyAShrInst(Op0, Op1, isExact, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyAShrInst(Op0, Op1, isExact, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+ const SimplifyQuery &Q) {
+ return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
+}
+
static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
ICmpInst *UnsignedICmp, bool IsAnd) {
Value *X, *Y;
@@ -1575,6 +1621,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
return X;
+ // FIXME: This should be shared with or-of-icmps.
// Look for this pattern: (icmp V, C0) & (icmp V, C1)).
Type *ITy = Op0->getType();
ICmpInst::Predicate Pred0, Pred1;
@@ -1584,10 +1631,16 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) {
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
- auto Range0 = ConstantRange::makeAllowedICmpRegion(Pred0, *C0);
- auto Range1 = ConstantRange::makeAllowedICmpRegion(Pred1, *C1);
+ auto Range0 = ConstantRange::makeExactICmpRegion(Pred0, *C0);
+ auto Range1 = ConstantRange::makeExactICmpRegion(Pred1, *C1);
if (Range0.intersectWith(Range1).isEmptySet())
return getFalse(ITy);
+
+ // If a range is a superset of the other, the smaller set is all we need.
+ if (Range0.contains(Range1))
+ return Op1;
+ if (Range1.contains(Range0))
+ return Op0;
}
// (icmp (add V, C0), C1) & (icmp V, C0)
@@ -1633,7 +1686,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q))
return C;
@@ -1744,8 +1797,11 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyAndInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyAndInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
}
/// Commuted variants are assumed to be handled by calling this function again
@@ -1830,7 +1886,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
/// Given operands for an Or, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q))
return C;
@@ -1877,6 +1933,25 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
(A == Op0 || B == Op0))
return Constant::getAllOnesValue(Op0->getType());
+ // (A & ~B) | (A ^ B) -> (A ^ B)
+ // (~B & A) | (A ^ B) -> (A ^ B)
+ // (A & ~B) | (B ^ A) -> (B ^ A)
+ // (~B & A) | (B ^ A) -> (B ^ A)
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
+ match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
+ return Op1;
+
+ // Commute the 'or' operands.
+ // (A ^ B) | (A & ~B) -> (A ^ B)
+ // (A ^ B) | (~B & A) -> (A ^ B)
+ // (B ^ A) | (A & ~B) -> (B ^ A)
+ // (B ^ A) | (~B & A) -> (B ^ A)
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
+ match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
+ return Op0;
+
if (auto *ICILHS = dyn_cast<ICmpInst>(Op0)) {
if (auto *ICIRHS = dyn_cast<ICmpInst>(Op1)) {
if (Value *V = SimplifyOrOfICmps(ICILHS, ICIRHS))
@@ -1952,13 +2027,16 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyOrInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyOrInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a Xor, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
return C;
@@ -2001,10 +2079,14 @@ Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyXorInst(Op0, Op1, Query(DL, TLI, DT, AC, CxtI),
- RecursionLimit);
+ return ::SimplifyXorInst(Op0, Op1, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
+ return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit);
}
+
static Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
@@ -2238,7 +2320,7 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
/// Fold an icmp when its operands have i1 scalar type.
static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const Query &Q) {
+ Value *RHS, const SimplifyQuery &Q) {
Type *ITy = GetCompareTy(LHS); // The return type.
Type *OpTy = LHS->getType(); // The operand type.
if (!OpTy->getScalarType()->isIntegerTy(1))
@@ -2301,7 +2383,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
/// Try hard to fold icmp with zero RHS because this is a common case.
static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const Query &Q) {
+ Value *RHS, const SimplifyQuery &Q) {
if (!match(RHS, m_Zero()))
return nullptr;
@@ -2556,7 +2638,7 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
}
static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const Query &Q,
+ Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
Type *ITy = GetCompareTy(LHS); // The return type.
@@ -2866,7 +2948,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
/// Simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
- Value *RHS, const Query &Q,
+ Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
Type *ITy = GetCompareTy(LHS); // The return type.
Value *A, *B;
@@ -3070,7 +3152,7 @@ static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
/// Given operands for an ICmpInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
@@ -3342,11 +3424,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const APInt *RHSVal;
if (match(RHS, m_APInt(RHSVal))) {
unsigned BitWidth = RHSVal->getBitWidth();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
- Q.CxtI, Q.DT);
- if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0))
+ KnownBits LHSKnown(BitWidth);
+ computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ if (LHSKnown.Zero.intersects(*RHSVal) ||
+ !LHSKnown.One.isSubsetOf(*RHSVal))
return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
: ConstantInt::getTrue(ITy);
}
@@ -3372,14 +3453,19 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const SimplifyQuery &Q) {
+ return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
+}
+
/// Given operands for an FCmpInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- FastMathFlags FMF, const Query &Q,
+ FastMathFlags FMF, const SimplifyQuery &Q,
unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
@@ -3505,13 +3591,18 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF,
- Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI},
+ RecursionLimit);
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ FastMathFlags FMF, const SimplifyQuery &Q) {
+ return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
}
/// See if V simplifies when its operand Op is replaced with RepOp.
static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
- const Query &Q,
+ const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Trivial replacement.
if (V == Op)
@@ -3659,7 +3750,7 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal,
/// Try to simplify a select instruction when its condition operand is an
/// integer comparison.
static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
- Value *FalseVal, const Query &Q,
+ Value *FalseVal, const SimplifyQuery &Q,
unsigned MaxRecurse) {
ICmpInst::Predicate Pred;
Value *CmpLHS, *CmpRHS;
@@ -3738,7 +3829,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
- Value *FalseVal, const Query &Q,
+ Value *FalseVal, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// select true, X, Y -> X
// select false, X, Y -> Y
@@ -3775,14 +3866,19 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifySelectInst(Cond, TrueVal, FalseVal,
- Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal, {DL, TLI, DT, AC, CxtI},
+ RecursionLimit);
+}
+
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+ const SimplifyQuery &Q) {
+ return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
}
/// Given operands for an GetElementPtrInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
- const Query &Q, unsigned) {
+ const SimplifyQuery &Q, unsigned) {
// The type of the GEP pointer operand.
unsigned AS =
cast<PointerType>(Ops[0]->getType()->getScalarType())->getAddressSpace();
@@ -3896,14 +3992,18 @@ Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyGEPInst(SrcTy, Ops,
- Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifyGEPInst(SrcTy, Ops, {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+ const SimplifyQuery &Q) {
+ return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit);
}
/// Given operands for an InsertValueInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
- ArrayRef<unsigned> Idxs, const Query &Q,
+ ArrayRef<unsigned> Idxs, const SimplifyQuery &Q,
unsigned) {
if (Constant *CAgg = dyn_cast<Constant>(Agg))
if (Constant *CVal = dyn_cast<Constant>(Val))
@@ -3933,14 +4033,20 @@ Value *llvm::SimplifyInsertValueInst(
Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, const DataLayout &DL,
const TargetLibraryInfo *TLI, const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+ ArrayRef<unsigned> Idxs,
+ const SimplifyQuery &Q) {
+ return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
+}
+
/// Given operands for an ExtractValueInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
- const Query &, unsigned) {
+ const SimplifyQuery &, unsigned) {
if (auto *CAgg = dyn_cast<Constant>(Agg))
return ConstantFoldExtractValueInstruction(CAgg, Idxs);
@@ -3968,13 +4074,18 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const DominatorTree *DT,
AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyExtractValueInst(Agg, Idxs, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+ const SimplifyQuery &Q) {
+ return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
+}
+
/// Given operands for an ExtractElementInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &,
unsigned) {
if (auto *CVec = dyn_cast<Constant>(Vec)) {
if (auto *CIdx = dyn_cast<Constant>(Idx))
@@ -4000,12 +4111,17 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
Value *llvm::SimplifyExtractElementInst(
Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
- return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyExtractElementInst(Vec, Idx, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
+ const SimplifyQuery &Q) {
+ return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
+}
+
/// See if we can fold the given phi. If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
+static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
Value *CommonValue = nullptr;
@@ -4038,7 +4154,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
}
static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
- Type *Ty, const Query &Q, unsigned MaxRecurse) {
+ Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) {
if (auto *C = dyn_cast<Constant>(Op))
return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
@@ -4076,10 +4192,15 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyCastInst(CastOpc, Op, Ty, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+ const SimplifyQuery &Q) {
+ return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
+}
+
/// For the given destination element of a shuffle, peek through shuffles to
/// match a root vector source operand that contains that element in the same
/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
@@ -4135,7 +4256,7 @@ static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
}
static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
- Type *RetTy, const Query &Q,
+ Type *RetTy, const SimplifyQuery &Q,
unsigned MaxRecurse) {
Type *InVecTy = Op0->getType();
unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
@@ -4207,8 +4328,13 @@ Value *llvm::SimplifyShuffleVectorInst(
Value *Op0, Value *Op1, Constant *Mask, Type *RetTy,
const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT,
AssumptionCache *AC, const Instruction *CxtI) {
- return ::SimplifyShuffleVectorInst(
- Op0, Op1, Mask, RetTy, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy,
+ {DL, TLI, DT, AC, CxtI}, RecursionLimit);
+}
+
+Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
+ Type *RetTy, const SimplifyQuery &Q) {
+ return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
//=== Helper functions for higher up the class hierarchy.
@@ -4216,7 +4342,7 @@ Value *llvm::SimplifyShuffleVectorInst(
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
@@ -4264,7 +4390,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const FastMathFlags &FMF, const Query &Q,
+ const FastMathFlags &FMF, const SimplifyQuery &Q,
unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FAdd:
@@ -4284,22 +4410,32 @@ Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyBinOp(Opcode, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyBinOp(Opcode, LHS, RHS, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const SimplifyQuery &Q) {
+ return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
+}
+
Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- const FastMathFlags &FMF, const DataLayout &DL,
+ FastMathFlags FMF, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ FastMathFlags FMF, const SimplifyQuery &Q) {
+ return ::SimplifyFPBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
+}
+
/// Given operands for a CmpInst, see if we can fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
@@ -4309,10 +4445,15 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const SimplifyQuery &Q) {
+ return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
+}
+
static bool IsIdempotent(Intrinsic::ID ID) {
switch (ID) {
default: return false;
@@ -4403,7 +4544,7 @@ static bool maskIsAllZeroOrUndef(Value *Mask) {
template <typename IterTy>
static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
Intrinsic::ID IID = F->getIntrinsicID();
unsigned NumOperands = std::distance(ArgBegin, ArgEnd);
@@ -4497,7 +4638,7 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
template <typename IterTy>
static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
- const Query &Q, unsigned MaxRecurse) {
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
Type *Ty = V->getType();
if (PointerType *PTy = dyn_cast<PointerType>(Ty))
Ty = PTy->getElementType();
@@ -4535,16 +4676,26 @@ Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
User::op_iterator ArgEnd, const DataLayout &DL,
const TargetLibraryInfo *TLI, const DominatorTree *DT,
AssumptionCache *AC, const Instruction *CxtI) {
- return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT, AC, CxtI),
+ return ::SimplifyCall(V, ArgBegin, ArgEnd, {DL, TLI, DT, AC, CxtI},
RecursionLimit);
}
+Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
+ User::op_iterator ArgEnd, const SimplifyQuery &Q) {
+ return ::SimplifyCall(V, ArgBegin, ArgEnd, Q, RecursionLimit);
+}
+
Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
const DataLayout &DL, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
const Instruction *CxtI) {
- return ::SimplifyCall(V, Args.begin(), Args.end(),
- Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+ return ::SimplifyCall(V, Args.begin(), Args.end(), {DL, TLI, DT, AC, CxtI},
+ RecursionLimit);
+}
+
+Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
+ const SimplifyQuery &Q) {
+ return ::SimplifyCall(V, Args.begin(), Args.end(), Q, RecursionLimit);
}
/// See if we can compute a simplified version of this instruction.
@@ -4553,152 +4704,141 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE) {
+ return SimplifyInstruction(I, {DL, TLI, DT, AC, I}, ORE);
+}
+
+Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
+ OptimizationRemarkEmitter *ORE) {
Value *Result;
switch (I->getOpcode()) {
default:
- Result = ConstantFoldInstruction(I, DL, TLI);
+ Result = ConstantFoldInstruction(I, Q.DL, Q.TLI);
break;
case Instruction::FAdd:
Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AC, I);
+ I->getFastMathFlags(), Q);
break;
case Instruction::Add:
Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
- TLI, DT, AC, I);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AC, I);
+ I->getFastMathFlags(), Q);
break;
case Instruction::Sub:
Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
- TLI, DT, AC, I);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AC, I);
+ I->getFastMathFlags(), Q);
break;
case Instruction::Mul:
- Result =
- SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::SDiv:
- Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AC, I);
+ Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::UDiv:
- Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AC, I);
+ Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::FDiv:
Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AC, I);
+ I->getFastMathFlags(), Q);
break;
case Instruction::SRem:
- Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AC, I);
+ Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::URem:
- Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT,
- AC, I);
+ Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::FRem:
Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AC, I);
+ I->getFastMathFlags(), Q);
break;
case Instruction::Shl:
Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
cast<BinaryOperator>(I)->hasNoSignedWrap(),
- cast<BinaryOperator>(I)->hasNoUnsignedWrap(), DL,
- TLI, DT, AC, I);
+ cast<BinaryOperator>(I)->hasNoUnsignedWrap(), Q);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
- AC, I);
+ cast<BinaryOperator>(I)->isExact(), Q);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
- cast<BinaryOperator>(I)->isExact(), DL, TLI, DT,
- AC, I);
+ cast<BinaryOperator>(I)->isExact(), Q);
break;
case Instruction::And:
- Result =
- SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::Or:
- Result =
- SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::Xor:
- Result =
- SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::ICmp:
- Result =
- SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), I->getOperand(0),
- I->getOperand(1), DL, TLI, DT, AC, I);
+ Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), Q);
break;
case Instruction::FCmp:
- Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
- I->getOperand(0), I->getOperand(1),
- I->getFastMathFlags(), DL, TLI, DT, AC, I);
+ Result =
+ SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
+ I->getOperand(1), I->getFastMathFlags(), Q);
break;
case Instruction::Select:
Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
- I->getOperand(2), DL, TLI, DT, AC, I);
+ I->getOperand(2), Q);
break;
case Instruction::GetElementPtr: {
- SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+ SmallVector<Value *, 8> Ops(I->op_begin(), I->op_end());
Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(),
- Ops, DL, TLI, DT, AC, I);
+ Ops, Q);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
IV->getInsertedValueOperand(),
- IV->getIndices(), DL, TLI, DT, AC, I);
+ IV->getIndices(), Q);
break;
}
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(I);
Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
- EVI->getIndices(), DL, TLI, DT, AC, I);
+ EVI->getIndices(), Q);
break;
}
case Instruction::ExtractElement: {
auto *EEI = cast<ExtractElementInst>(I);
- Result = SimplifyExtractElementInst(
- EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
+ Result = SimplifyExtractElementInst(EEI->getVectorOperand(),
+ EEI->getIndexOperand(), Q);
break;
}
case Instruction::ShuffleVector: {
auto *SVI = cast<ShuffleVectorInst>(I);
Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
- SVI->getMask(), SVI->getType(), DL, TLI,
- DT, AC, I);
+ SVI->getMask(), SVI->getType(), Q);
break;
}
case Instruction::PHI:
- Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
+ Result = SimplifyPHINode(cast<PHINode>(I), Q);
break;
case Instruction::Call: {
CallSite CS(cast<CallInst>(I));
- Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), DL,
- TLI, DT, AC, I);
+ Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), Q);
break;
}
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
#include "llvm/IR/Instruction.def"
#undef HANDLE_CAST_INST
- Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(),
- DL, TLI, DT, AC, I);
+ Result =
+ SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(), Q);
break;
case Instruction::Alloca:
// No simplifications for Alloca and it can't be constant folded.
@@ -4710,11 +4850,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
// value even when the operands are not all constants.
if (!Result && I->getType()->isIntOrIntVectorTy()) {
unsigned BitWidth = I->getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT, ORE);
- if ((KnownZero | KnownOne).isAllOnesValue())
- Result = ConstantInt::get(I->getType(), KnownOne);
+ KnownBits Known(BitWidth);
+ computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE);
+ if ((Known.Zero | Known.One).isAllOnesValue())
+ Result = ConstantInt::get(I->getType(), Known.One);
}
/// If called on unreachable code, the above logic may report that the
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 2ca46b1fe872..0f04af54cdc7 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -70,6 +70,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -534,10 +535,9 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
VectorType *VecTy = dyn_cast<VectorType>(V->getType());
if (!VecTy) {
unsigned BitWidth = V->getType()->getIntegerBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC,
- dyn_cast<Instruction>(V), DT);
- return KnownZero.isAllOnesValue();
+ KnownBits Known(BitWidth);
+ computeKnownBits(V, Known, DL, 0, AC, dyn_cast<Instruction>(V), DT);
+ return Known.Zero.isAllOnesValue();
}
// Per-component check doesn't work with zeroinitializer
@@ -556,9 +556,9 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
if (isa<UndefValue>(Elem))
return true;
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Elem, KnownZero, KnownOne, DL);
- if (KnownZero.isAllOnesValue())
+ KnownBits Known(BitWidth);
+ computeKnownBits(Elem, Known, DL);
+ if (Known.Zero.isAllOnesValue())
return true;
}
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
index c63677fe5502..da5c79ab6c81 100644
--- a/lib/Analysis/MemorySSAUpdater.cpp
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -29,7 +29,7 @@
#define DEBUG_TYPE "memoryssa"
using namespace llvm;
-namespace llvm {
+
// This is the marker algorithm from "Simple and Efficient Construction of
// Static Single Assignment Form"
// The simple, non-marker algorithm places phi nodes at any join
@@ -211,8 +211,8 @@ void MemorySSAUpdater::insertUse(MemoryUse *MU) {
}
// Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
-void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
- MemoryAccess *NewDef) {
+static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
+ MemoryAccess *NewDef) {
// Replace any operand with us an incoming block with the new defining
// access.
int i = MP->getBasicBlockIndex(BB);
@@ -415,6 +415,7 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
}
return MA;
}
+
void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
assert(!MSSA->isLiveOnEntryDef(MA) &&
"Trying to remove the live on entry def");
@@ -490,5 +491,3 @@ MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter(
++InsertPt->getIterator());
return NewAccess;
}
-
-} // namespace llvm
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 700c383a9dd4..3ac4bf1276eb 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -89,6 +89,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SaveAndRestore.h"
@@ -4575,10 +4576,10 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
- APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
+ KnownBits Known(BitWidth);
+ computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC,
nullptr, &DT);
- return Zeros.countTrailingOnes();
+ return Known.Zero.countTrailingOnes();
}
// SCEVUDivExpr
@@ -4757,11 +4758,12 @@ ScalarEvolution::getRange(const SCEV *S,
const DataLayout &DL = getDataLayout();
if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
// For a SCEVUnknown, ask ValueTracking.
- APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
- if (Ones != ~Zeros + 1)
+ KnownBits Known(BitWidth);
+ computeKnownBits(U->getValue(), Known, DL, 0, &AC, nullptr, &DT);
+ if (Known.One != ~Known.Zero + 1)
ConservativeResult =
- ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
+ ConservativeResult.intersectWith(ConstantRange(Known.One,
+ ~Known.Zero + 1));
} else {
assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
"generalize as needed!");
@@ -5292,13 +5294,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
unsigned LZ = A.countLeadingZeros();
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(BO->LHS, KnownZero, KnownOne, getDataLayout(),
+ KnownBits Known(BitWidth);
+ computeKnownBits(BO->LHS, Known, getDataLayout(),
0, &AC, nullptr, &DT);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
- if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
+ if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) {
const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
const SCEV *LHS = getSCEV(BO->LHS);
const SCEV *ShiftedLHS = nullptr;
@@ -5328,12 +5330,28 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case Instruction::Or:
- // Use ValueTracking to check whether this is actually an add.
- if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC,
- nullptr, &DT)) {
- // There aren't any common bits set, so the add can't wrap.
- auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW);
- return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags);
+ // If the RHS of the Or is a constant, we may have something like:
+ // X*4+1 which got turned into X*4|1. Handle this as an Add so loop
+ // optimizations will transparently handle this case.
+ //
+ // In order for this transformation to be safe, the LHS must be of the
+ // form X*(2^n) and the Or constant must be less than 2^n.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) {
+ const SCEV *LHS = getSCEV(BO->LHS);
+ const APInt &CIVal = CI->getValue();
+ if (GetMinTrailingZeros(LHS) >=
+ (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+ // Build a plain add SCEV.
+ const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+ // If the LHS of the add was an addrec and it has no-wrap flags,
+ // transfer the no-wrap flags, since an or won't introduce a wrap.
+ if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+ const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+ const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags(
+ OldAR->getNoWrapFlags());
+ }
+ return S;
+ }
}
break;
@@ -6063,24 +6081,74 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
return getCouldNotCompute();
}
-ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimitFromCond(const Loop *L,
- Value *ExitCond,
- BasicBlock *TBB,
- BasicBlock *FBB,
- bool ControlsExit,
- bool AllowPredicates) {
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
+ const Loop *L, Value *ExitCond, BasicBlock *TBB, BasicBlock *FBB,
+ bool ControlsExit, bool AllowPredicates) {
+ ScalarEvolution::ExitLimitCacheTy Cache(L, TBB, FBB, AllowPredicates);
+ return computeExitLimitFromCondCached(Cache, L, ExitCond, TBB, FBB,
+ ControlsExit, AllowPredicates);
+}
+
+Optional<ScalarEvolution::ExitLimit>
+ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
+ BasicBlock *TBB, BasicBlock *FBB,
+ bool ControlsExit, bool AllowPredicates) {
+ (void)this->L;
+ (void)this->TBB;
+ (void)this->FBB;
+ (void)this->AllowPredicates;
+
+ assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
+ this->AllowPredicates == AllowPredicates &&
+ "Variance in assumed invariant key components!");
+ auto Itr = TripCountMap.find({ExitCond, ControlsExit});
+ if (Itr == TripCountMap.end())
+ return None;
+ return Itr->second;
+}
+
+void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
+ BasicBlock *TBB, BasicBlock *FBB,
+ bool ControlsExit,
+ bool AllowPredicates,
+ const ExitLimit &EL) {
+ assert(this->L == L && this->TBB == TBB && this->FBB == FBB &&
+ this->AllowPredicates == AllowPredicates &&
+ "Variance in assumed invariant key components!");
+
+ auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
+ assert(InsertResult.second && "Expected successful insertion!");
+ (void)InsertResult;
+}
+
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
+ ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
+ BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
+
+ if (auto MaybeEL =
+ Cache.find(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates))
+ return *MaybeEL;
+
+ ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, TBB, FBB,
+ ControlsExit, AllowPredicates);
+ Cache.insert(L, ExitCond, TBB, FBB, ControlsExit, AllowPredicates, EL);
+ return EL;
+}
+
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
+ ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, BasicBlock *TBB,
+ BasicBlock *FBB, bool ControlsExit, bool AllowPredicates) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
bool EitherMayExit = L->contains(TBB);
- ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- ControlsExit && !EitherMayExit,
- AllowPredicates);
- ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- ControlsExit && !EitherMayExit,
- AllowPredicates);
+ ExitLimit EL0 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
+ AllowPredicates);
+ ExitLimit EL1 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
+ AllowPredicates);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
@@ -6124,12 +6192,12 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
bool EitherMayExit = L->contains(FBB);
- ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
- ControlsExit && !EitherMayExit,
- AllowPredicates);
- ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
- ControlsExit && !EitherMayExit,
- AllowPredicates);
+ ExitLimit EL0 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(0), TBB, FBB, ControlsExit && !EitherMayExit,
+ AllowPredicates);
+ ExitLimit EL1 = computeExitLimitFromCondCached(
+ Cache, L, BO->getOperand(1), TBB, FBB, ControlsExit && !EitherMayExit,
+ AllowPredicates);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
if (EitherMayExit) {
@@ -10221,84 +10289,75 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
-typedef DenseMap<const Loop *, std::string> VerifyMap;
+void ScalarEvolution::verify() const {
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+ ScalarEvolution SE2(F, TLI, AC, DT, LI);
+
+ SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end());
-/// replaceSubString - Replaces all occurrences of From in Str with To.
-static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
- size_t Pos = 0;
- while ((Pos = Str.find(From, Pos)) != std::string::npos) {
- Str.replace(Pos, From.size(), To.data(), To.size());
- Pos += To.size();
- }
-}
+ // Map's SCEV expressions from one ScalarEvolution "universe" to another.
+ struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> {
+ const SCEV *visitConstant(const SCEVConstant *Constant) {
+ return SE.getConstant(Constant->getAPInt());
+ }
+ const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+ return SE.getUnknown(Expr->getValue());
+ }
-/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
-static void
-getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
- std::string &S = Map[L];
- if (S.empty()) {
- raw_string_ostream OS(S);
- SE.getBackedgeTakenCount(L)->print(OS);
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+ return SE.getCouldNotCompute();
+ }
+ SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {}
+ };
- // false and 0 are semantically equivalent. This can happen in dead loops.
- replaceSubString(OS.str(), "false", "0");
- // Remove wrap flags, their use in SCEV is highly fragile.
- // FIXME: Remove this when SCEV gets smarter about them.
- replaceSubString(OS.str(), "<nw>", "");
- replaceSubString(OS.str(), "<nsw>", "");
- replaceSubString(OS.str(), "<nuw>", "");
- }
+ SCEVMapper SCM(SE2);
- for (auto *R : reverse(*L))
- getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
-}
+ while (!LoopStack.empty()) {
+ auto *L = LoopStack.pop_back_val();
+ LoopStack.insert(LoopStack.end(), L->begin(), L->end());
-void ScalarEvolution::verify() const {
- ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+ auto *CurBECount = SCM.visit(
+ const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L));
+ auto *NewBECount = SE2.getBackedgeTakenCount(L);
- // Gather stringified backedge taken counts for all loops using SCEV's caches.
- // FIXME: It would be much better to store actual values instead of strings,
- // but SCEV pointers will change if we drop the caches.
- VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
- for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
- getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
+ if (CurBECount == SE2.getCouldNotCompute() ||
+ NewBECount == SE2.getCouldNotCompute()) {
+ // NB! This situation is legal, but is very suspicious -- whatever pass
+ // change the loop to make a trip count go from could not compute to
+ // computable or vice-versa *should have* invalidated SCEV. However, we
+ // choose not to assert here (for now) since we don't want false
+ // positives.
+ continue;
+ }
- // Gather stringified backedge taken counts for all loops using a fresh
- // ScalarEvolution object.
- ScalarEvolution SE2(F, TLI, AC, DT, LI);
- for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
- getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
-
- // Now compare whether they're the same with and without caches. This allows
- // verifying that no pass changed the cache.
- assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
- "New loops suddenly appeared!");
-
- for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
- OldE = BackedgeDumpsOld.end(),
- NewI = BackedgeDumpsNew.begin();
- OldI != OldE; ++OldI, ++NewI) {
- assert(OldI->first == NewI->first && "Loop order changed!");
-
- // Compare the stringified SCEVs. We don't care if undef backedgetaken count
- // changes.
- // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
- // means that a pass is buggy or SCEV has to learn a new pattern but is
- // usually not harmful.
- if (OldI->second != NewI->second &&
- OldI->second.find("undef") == std::string::npos &&
- NewI->second.find("undef") == std::string::npos &&
- OldI->second != "***COULDNOTCOMPUTE***" &&
- NewI->second != "***COULDNOTCOMPUTE***") {
- dbgs() << "SCEVValidator: SCEV for loop '"
- << OldI->first->getHeader()->getName()
- << "' changed from '" << OldI->second
- << "' to '" << NewI->second << "'!\n";
+ if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) {
+ // SCEV treats "undef" as an unknown but consistent value (i.e. it does
+ // not propagate undef aggressively). This means we can (and do) fail
+ // verification in cases where a transform makes the trip count of a loop
+ // go from "undef" to "undef+1" (say). The transform is fine, since in
+ // both cases the loop iterates "undef" times, but SCEV thinks we
+ // increased the trip count of the loop by 1 incorrectly.
+ continue;
+ }
+
+ if (SE.getTypeSizeInBits(CurBECount->getType()) >
+ SE.getTypeSizeInBits(NewBECount->getType()))
+ NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType());
+ else if (SE.getTypeSizeInBits(CurBECount->getType()) <
+ SE.getTypeSizeInBits(NewBECount->getType()))
+ CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType());
+
+ auto *ConstantDelta =
+ dyn_cast<SCEVConstant>(SE2.getMinusSCEV(CurBECount, NewBECount));
+
+ if (ConstantDelta && ConstantDelta->getAPInt() != 0) {
+ dbgs() << "Trip Count Changed!\n";
+ dbgs() << "Old: " << *CurBECount << "\n";
+ dbgs() << "New: " << *NewBECount << "\n";
+ dbgs() << "Delta: " << *ConstantDelta << "\n";
std::abort();
}
}
-
- // TODO: Verify more things.
}
bool ScalarEvolution::invalidate(
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 2aaa4c1ae117..54c44c8e542d 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -51,40 +51,47 @@ NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) {
transform(AR->operands(), std::back_inserter(Operands),
[&](const SCEV *Op) { return visit(Op); });
- // Conservatively use AnyWrap until/unless we need FlagNW.
- const SCEV *Result =
- SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
- switch (Kind) {
- case Normalize:
- // We want to normalize step expression, because otherwise we might not be
- // able to denormalize to the original expression.
+ if (!Pred(AR))
+ return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
+
+ // Normalization and denormalization are fancy names for decrementing and
+ // incrementing a SCEV expression with respect to a set of loops. Since
+ // Pred(AR) has returned true, we know we need to normalize or denormalize AR
+ // with respect to its loop.
+
+ if (Kind == Denormalize) {
+ // Denormalization / "partial increment" is essentially the same as \c
+ // SCEVAddRecExpr::getPostIncExpr. Here we use an explicit loop to make the
+ // symmetry with Normalization clear.
+ for (int i = 0, e = Operands.size() - 1; i < e; i++)
+ Operands[i] = SE.getAddExpr(Operands[i], Operands[i + 1]);
+ } else {
+ assert(Kind == Normalize && "Only two possibilities!");
+
+ // Normalization / "partial decrement" is a bit more subtle. Since
+ // incrementing a SCEV expression (in general) changes the step of the SCEV
+ // expression as well, we cannot use the step of the current expression.
+ // Instead, we have to use the step of the very expression we're trying to
+ // compute!
+ //
+ // We solve the issue by recursively building up the result, starting from
+ // the "least significant" operand in the add recurrence:
//
- // Here is an example what will happen if we don't normalize step:
- // ORIGINAL ISE:
- // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
- // NORMALIZED ISE:
- // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
- // (100 /u {0,+,1}<%bb16>)}<%bb25>
- // DENORMALIZED BACK ISE:
- // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
- // (100 /u {1,+,1}<%bb16>)}<%bb25>
- // Note that the initial value changes after normalization +
- // denormalization, which isn't correct.
- if (Pred(AR)) {
- const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE));
- Result = SE.getMinusSCEV(Result, TransformedStep);
- }
- break;
- case Denormalize:
- // Here we want to normalize step expressions for the same reasons, as
- // stated above.
- if (Pred(AR)) {
- const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE));
- Result = SE.getAddExpr(Result, TransformedStep);
- }
- break;
+ // Base case:
+ // Single operand add recurrence. It's its own normalization.
+ //
+ // N-operand case:
+ // {S_{N-1},+,S_{N-2},+,...,+,S_0} = S
+ //
+ // Since the step recurrence of S is {S_{N-2},+,...,+,S_0}, we know its
+ // normalization by induction. We subtract the normalized step
+ // recurrence from S_{N-1} to get the normalization of S.
+
+ for (int i = Operands.size() - 2; i >= 0; i--)
+ Operands[i] = SE.getMinusSCEV(Operands[i], Operands[i + 1]);
}
- return Result;
+
+ return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
}
const SCEV *llvm::normalizeForPostIncUse(const SCEV *S,
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 900a2363e60d..af964b6259bb 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <array>
@@ -130,15 +131,15 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
return nullptr;
}
-static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
+static void computeKnownBits(const Value *V, KnownBits &Known,
unsigned Depth, const Query &Q);
-void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
+void llvm::computeKnownBits(const Value *V, KnownBits &Known,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT,
OptimizationRemarkEmitter *ORE) {
- ::computeKnownBits(V, KnownZero, KnownOne, Depth,
+ ::computeKnownBits(V, Known, Depth,
Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
}
@@ -151,11 +152,11 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
assert(LHS->getType()->isIntOrIntVectorTy() &&
"LHS and RHS should be integers");
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
- APInt LHSKnownZero(IT->getBitWidth(), 0), LHSKnownOne(IT->getBitWidth(), 0);
- APInt RHSKnownZero(IT->getBitWidth(), 0), RHSKnownOne(IT->getBitWidth(), 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, 0, AC, CxtI, DT);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, 0, AC, CxtI, DT);
- return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+ KnownBits LHSKnown(IT->getBitWidth());
+ KnownBits RHSKnown(IT->getBitWidth());
+ computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT);
+ computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT);
+ return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue();
}
static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
@@ -252,67 +253,65 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
bool NSW,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2,
+ KnownBits &KnownOut, KnownBits &Known2,
unsigned Depth, const Query &Q) {
- unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned BitWidth = KnownOut.getBitWidth();
// If an initial sequence of bits in the result is not needed, the
// corresponding bits in the operands are not needed.
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, Depth + 1, Q);
- computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q);
+ KnownBits LHSKnown(BitWidth);
+ computeKnownBits(Op0, LHSKnown, Depth + 1, Q);
+ computeKnownBits(Op1, Known2, Depth + 1, Q);
// Carry in a 1 for a subtract, rather than a 0.
uint64_t CarryIn = 0;
if (!Add) {
// Sum = LHS + ~RHS + 1
- std::swap(KnownZero2, KnownOne2);
+ std::swap(Known2.Zero, Known2.One);
CarryIn = 1;
}
- APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn;
- APInt PossibleSumOne = LHSKnownOne + KnownOne2 + CarryIn;
+ APInt PossibleSumZero = ~LHSKnown.Zero + ~Known2.Zero + CarryIn;
+ APInt PossibleSumOne = LHSKnown.One + Known2.One + CarryIn;
// Compute known bits of the carry.
- APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnownZero ^ KnownZero2);
- APInt CarryKnownOne = PossibleSumOne ^ LHSKnownOne ^ KnownOne2;
+ APInt CarryKnownZero = ~(PossibleSumZero ^ LHSKnown.Zero ^ Known2.Zero);
+ APInt CarryKnownOne = PossibleSumOne ^ LHSKnown.One ^ Known2.One;
// Compute set of known bits (where all three relevant bits are known).
- APInt LHSKnown = LHSKnownZero | LHSKnownOne;
- APInt RHSKnown = KnownZero2 | KnownOne2;
- APInt CarryKnown = CarryKnownZero | CarryKnownOne;
- APInt Known = LHSKnown & RHSKnown & CarryKnown;
+ APInt LHSKnownUnion = LHSKnown.Zero | LHSKnown.One;
+ APInt RHSKnownUnion = Known2.Zero | Known2.One;
+ APInt CarryKnownUnion = CarryKnownZero | CarryKnownOne;
+ APInt Known = LHSKnownUnion & RHSKnownUnion & CarryKnownUnion;
assert((PossibleSumZero & Known) == (PossibleSumOne & Known) &&
"known bits of sum differ");
// Compute known bits of the result.
- KnownZero = ~PossibleSumOne & Known;
- KnownOne = PossibleSumOne & Known;
+ KnownOut.Zero = ~PossibleSumOne & Known;
+ KnownOut.One = PossibleSumOne & Known;
// Are we still trying to solve for the sign bit?
if (!Known.isSignBitSet()) {
if (NSW) {
// Adding two non-negative numbers, or subtracting a negative number from
// a non-negative one, can't wrap into negative.
- if (LHSKnownZero.isSignBitSet() && KnownZero2.isSignBitSet())
- KnownZero.setSignBit();
+ if (LHSKnown.Zero.isSignBitSet() && Known2.Zero.isSignBitSet())
+ KnownOut.Zero.setSignBit();
// Adding two negative numbers, or subtracting a non-negative number from
// a negative one, can't wrap into non-negative.
- else if (LHSKnownOne.isSignBitSet() && KnownOne2.isSignBitSet())
- KnownOne.setSignBit();
+ else if (LHSKnown.One.isSignBitSet() && Known2.One.isSignBitSet())
+ KnownOut.One.setSignBit();
}
}
}
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2,
+ KnownBits &Known, KnownBits &Known2,
unsigned Depth, const Query &Q) {
- unsigned BitWidth = KnownZero.getBitWidth();
- computeKnownBits(Op1, KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(Op0, KnownZero2, KnownOne2, Depth + 1, Q);
+ unsigned BitWidth = Known.getBitWidth();
+ computeKnownBits(Op1, Known, Depth + 1, Q);
+ computeKnownBits(Op0, Known2, Depth + 1, Q);
bool isKnownNegative = false;
bool isKnownNonNegative = false;
@@ -322,10 +321,10 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
// The product of a number with itself is non-negative.
isKnownNonNegative = true;
} else {
- bool isKnownNonNegativeOp1 = KnownZero.isSignBitSet();
- bool isKnownNonNegativeOp0 = KnownZero2.isSignBitSet();
- bool isKnownNegativeOp1 = KnownOne.isSignBitSet();
- bool isKnownNegativeOp0 = KnownOne2.isSignBitSet();
+ bool isKnownNonNegativeOp1 = Known.Zero.isSignBitSet();
+ bool isKnownNonNegativeOp0 = Known2.Zero.isSignBitSet();
+ bool isKnownNegativeOp1 = Known.One.isSignBitSet();
+ bool isKnownNegativeOp0 = Known2.One.isSignBitSet();
// The product of two numbers with the same sign is non-negative.
isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
(isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
@@ -343,28 +342,28 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
// Also compute a conservative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clearAllBits();
- unsigned TrailZ = KnownZero.countTrailingOnes() +
- KnownZero2.countTrailingOnes();
- unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
- KnownZero2.countLeadingOnes(),
+ Known.One.clearAllBits();
+ unsigned TrailZ = Known.Zero.countTrailingOnes() +
+ Known2.Zero.countTrailingOnes();
+ unsigned LeadZ = std::max(Known.Zero.countLeadingOnes() +
+ Known2.Zero.countLeadingOnes(),
BitWidth) - BitWidth;
TrailZ = std::min(TrailZ, BitWidth);
LeadZ = std::min(LeadZ, BitWidth);
- KnownZero.clearAllBits();
- KnownZero.setLowBits(TrailZ);
- KnownZero.setHighBits(LeadZ);
+ Known.Zero.clearAllBits();
+ Known.Zero.setLowBits(TrailZ);
+ Known.Zero.setHighBits(LeadZ);
// Only make use of no-wrap flags if we failed to compute the sign bit
// directly. This matters if the multiplication always overflows, in
// which case we prefer to follow the result of the direct computation,
// though as the program is invoking undefined behaviour we can choose
// whatever we like here.
- if (isKnownNonNegative && !KnownOne.isSignBitSet())
- KnownZero.setSignBit();
- else if (isKnownNegative && !KnownZero.isSignBitSet())
- KnownOne.setSignBit();
+ if (isKnownNonNegative && !Known.One.isSignBitSet())
+ Known.Zero.setSignBit();
+ else if (isKnownNegative && !Known.Zero.isSignBitSet())
+ Known.One.setSignBit();
}
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
@@ -499,15 +498,14 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return !isEphemeralValueOf(Inv, CxtI);
}
-static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth,
- const Query &Q) {
+static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
+ unsigned Depth, const Query &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
if (!Q.AC || !Q.CxtI)
return;
- unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned BitWidth = Known.getBitWidth();
// Note that the patterns below need to be kept in sync with the code
// in AssumptionCache::updateAffectedValues.
@@ -532,15 +530,15 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
assert(BitWidth == 1 && "assume operand is not i1?");
- KnownZero.clearAllBits();
- KnownOne.setAllBits();
+ Known.Zero.clearAllBits();
+ Known.One.setAllBits();
return;
}
if (match(Arg, m_Not(m_Specific(V))) &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
assert(BitWidth == 1 && "assume operand is not i1?");
- KnownZero.setAllBits();
- KnownOne.clearAllBits();
+ Known.Zero.setAllBits();
+ Known.One.clearAllBits();
return;
}
@@ -558,126 +556,126 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
// assume(v = a)
if (match(Arg, m_c_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- KnownZero |= RHSKnownZero;
- KnownOne |= RHSKnownOne;
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ Known.Zero |= RHSKnown.Zero;
+ Known.One |= RHSKnown.One;
// assume(v & b = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
- computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits MaskKnown(BitWidth);
+ computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
// For those bits in the mask that are known to be one, we can propagate
// known bits from the RHS to V.
- KnownZero |= RHSKnownZero & MaskKnownOne;
- KnownOne |= RHSKnownOne & MaskKnownOne;
+ Known.Zero |= RHSKnown.Zero & MaskKnown.One;
+ Known.One |= RHSKnown.One & MaskKnown.One;
// assume(~(v & b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- APInt MaskKnownZero(BitWidth, 0), MaskKnownOne(BitWidth, 0);
- computeKnownBits(B, MaskKnownZero, MaskKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits MaskKnown(BitWidth);
+ computeKnownBits(B, MaskKnown, Depth+1, Query(Q, I));
// For those bits in the mask that are known to be one, we can propagate
// inverted known bits from the RHS to V.
- KnownZero |= RHSKnownOne & MaskKnownOne;
- KnownOne |= RHSKnownZero & MaskKnownOne;
+ Known.Zero |= RHSKnown.One & MaskKnown.One;
+ Known.One |= RHSKnown.Zero & MaskKnown.One;
// assume(v | b = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
- computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
// For those bits in B that are known to be zero, we can propagate known
// bits from the RHS to V.
- KnownZero |= RHSKnownZero & BKnownZero;
- KnownOne |= RHSKnownOne & BKnownZero;
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
// assume(~(v | b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
- computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
// For those bits in B that are known to be zero, we can propagate
// inverted known bits from the RHS to V.
- KnownZero |= RHSKnownOne & BKnownZero;
- KnownOne |= RHSKnownZero & BKnownZero;
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
// assume(v ^ b = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
- computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
// For those bits in B that are known to be zero, we can propagate known
// bits from the RHS to V. For those bits in B that are known to be one,
// we can propagate inverted known bits from the RHS to V.
- KnownZero |= RHSKnownZero & BKnownZero;
- KnownOne |= RHSKnownOne & BKnownZero;
- KnownZero |= RHSKnownOne & BKnownOne;
- KnownOne |= RHSKnownZero & BKnownOne;
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ Known.Zero |= RHSKnown.One & BKnown.One;
+ Known.One |= RHSKnown.Zero & BKnown.One;
// assume(~(v ^ b) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
- APInt BKnownZero(BitWidth, 0), BKnownOne(BitWidth, 0);
- computeKnownBits(B, BKnownZero, BKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
+ KnownBits BKnown(BitWidth);
+ computeKnownBits(B, BKnown, Depth+1, Query(Q, I));
// For those bits in B that are known to be zero, we can propagate
// inverted known bits from the RHS to V. For those bits in B that are
// known to be one, we can propagate known bits from the RHS to V.
- KnownZero |= RHSKnownOne & BKnownZero;
- KnownOne |= RHSKnownZero & BKnownZero;
- KnownZero |= RHSKnownZero & BKnownOne;
- KnownOne |= RHSKnownOne & BKnownOne;
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ Known.Zero |= RHSKnown.Zero & BKnown.One;
+ Known.One |= RHSKnown.One & BKnown.One;
// assume(v << c = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
// bits in V shifted to the right by C.
- RHSKnownZero.lshrInPlace(C->getZExtValue());
- KnownZero |= RHSKnownZero;
- RHSKnownOne.lshrInPlace(C->getZExtValue());
- KnownOne |= RHSKnownOne;
+ RHSKnown.Zero.lshrInPlace(C->getZExtValue());
+ Known.Zero |= RHSKnown.Zero;
+ RHSKnown.One.lshrInPlace(C->getZExtValue());
+ Known.One |= RHSKnown.One;
// assume(~(v << c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
// to known bits in V shifted to the right by C.
- RHSKnownOne.lshrInPlace(C->getZExtValue());
- KnownZero |= RHSKnownOne;
- RHSKnownZero.lshrInPlace(C->getZExtValue());
- KnownOne |= RHSKnownZero;
+ RHSKnown.One.lshrInPlace(C->getZExtValue());
+ Known.Zero |= RHSKnown.One;
+ RHSKnown.Zero.lshrInPlace(C->getZExtValue());
+ Known.One |= RHSKnown.Zero;
// assume(v >> c = a)
} else if (match(Arg,
m_c_ICmp(Pred, m_CombineOr(m_LShr(m_V, m_ConstantInt(C)),
@@ -685,12 +683,12 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them to known
// bits in V shifted to the right by C.
- KnownZero |= RHSKnownZero << C->getZExtValue();
- KnownOne |= RHSKnownOne << C->getZExtValue();
+ Known.Zero |= RHSKnown.Zero << C->getZExtValue();
+ Known.One |= RHSKnown.One << C->getZExtValue();
// assume(~(v >> c) = a)
} else if (match(Arg, m_c_ICmp(Pred, m_Not(m_CombineOr(
m_LShr(m_V, m_ConstantInt(C)),
@@ -698,78 +696,78 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
m_Value(A))) &&
Pred == ICmpInst::ICMP_EQ &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// For those bits in RHS that are known, we can propagate them inverted
// to known bits in V shifted to the right by C.
- KnownZero |= RHSKnownOne << C->getZExtValue();
- KnownOne |= RHSKnownZero << C->getZExtValue();
+ Known.Zero |= RHSKnown.One << C->getZExtValue();
+ Known.One |= RHSKnown.Zero << C->getZExtValue();
// assume(v >=_s c) where c is non-negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SGE &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- if (RHSKnownZero.isSignBitSet()) {
+ if (RHSKnown.Zero.isSignBitSet()) {
// We know that the sign bit is zero.
- KnownZero.setSignBit();
+ Known.Zero.setSignBit();
}
// assume(v >_s c) where c is at least -1.
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SGT &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isSignBitSet()) {
+ if (RHSKnown.One.isAllOnesValue() || RHSKnown.Zero.isSignBitSet()) {
// We know that the sign bit is zero.
- KnownZero.setSignBit();
+ Known.Zero.setSignBit();
}
// assume(v <=_s c) where c is negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SLE &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- if (RHSKnownOne.isSignBitSet()) {
+ if (RHSKnown.One.isSignBitSet()) {
// We know that the sign bit is one.
- KnownOne.setSignBit();
+ Known.One.setSignBit();
}
// assume(v <_s c) where c is non-positive
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_SLT &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
- if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isSignBitSet()) {
+ if (RHSKnown.Zero.isAllOnesValue() || RHSKnown.One.isSignBitSet()) {
// We know that the sign bit is one.
- KnownOne.setSignBit();
+ Known.One.setSignBit();
}
// assume(v <=_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_ULE &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// Whatever high bits in c are zero are known to be zero.
- KnownZero.setHighBits(RHSKnownZero.countLeadingOnes());
+ Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes());
// assume(v <_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_ULT &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
- computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(A, RHSKnown, Depth+1, Query(Q, I));
// Whatever high bits in c are zero are known to be zero (if c is a power
// of 2, then one more).
if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
- KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()+1);
+ Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes()+1);
else
- KnownZero.setHighBits(RHSKnownZero.countLeadingOnes());
+ Known.Zero.setHighBits(RHSKnown.Zero.countLeadingOnes());
}
}
@@ -778,9 +776,9 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
// so this isn't a real bug. On the other hand, the program may have undefined
// behavior, or we might have a bug in the compiler. We can't assert/crash, so
// clear out the known bits, try to warn the user, and hope for the best.
- if ((KnownZero & KnownOne) != 0) {
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ if (Known.Zero.intersects(Known.One)) {
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
if (Q.ORE) {
auto *CxtI = const_cast<Instruction *>(Q.CxtI);
@@ -793,57 +791,57 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
}
// Compute known bits from a shift operator, including those with a
-// non-constant shift amount. KnownZero and KnownOne are the outputs of this
-// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
-// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
-// functors that, given the known-zero or known-one bits respectively, and a
-// shift amount, compute the implied known-zero or known-one bits of the shift
-// operator's result respectively for that shift amount. The results from calling
-// KZF and KOF are conservatively combined for all permitted shift amounts.
+// non-constant shift amount. Known is the outputs of this function. Known2 is a
+// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are
+// operator-specific functors that, given the known-zero or known-one bits
+// respectively, and a shift amount, compute the implied known-zero or known-one
+// bits of the shift operator's result respectively for that shift amount. The
+// results from calling KZF and KOF are conservatively combined for all
+// permitted shift amounts.
static void computeKnownBitsFromShiftOperator(
- const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2,
- APInt &KnownOne2, unsigned Depth, const Query &Q,
+ const Operator *I, KnownBits &Known, KnownBits &Known2,
+ unsigned Depth, const Query &Q,
function_ref<APInt(const APInt &, unsigned)> KZF,
function_ref<APInt(const APInt &, unsigned)> KOF) {
- unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned BitWidth = Known.getBitWidth();
if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
- KnownZero = KZF(KnownZero, ShiftAmt);
- KnownOne = KOF(KnownOne, ShiftAmt);
- // If there is conflict between KnownZero and KnownOne, this must be an
- // overflowing left shift, so the shift result is undefined. Clear KnownZero
- // and KnownOne bits so that other code could propagate this undef.
- if ((KnownZero & KnownOne) != 0) {
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ Known.Zero = KZF(Known.Zero, ShiftAmt);
+ Known.One = KOF(Known.One, ShiftAmt);
+ // If there is conflict between Known.Zero and Known.One, this must be an
+ // overflowing left shift, so the shift result is undefined. Clear Known
+ // bits so that other code could propagate this undef.
+ if ((Known.Zero & Known.One) != 0) {
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
}
return;
}
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
// If the shift amount could be greater than or equal to the bit-width of the LHS, the
// value could be undef, so we don't know anything about it.
- if ((~KnownZero).uge(BitWidth)) {
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ if ((~Known.Zero).uge(BitWidth)) {
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
return;
}
- // Note: We cannot use KnownZero.getLimitedValue() here, because if
+ // Note: We cannot use Known.Zero.getLimitedValue() here, because if
// BitWidth > 64 and any upper bits are known, we'll end up returning the
// limit value (which implies all bits are known).
- uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
- uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+ uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
+ uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
// It would be more-clearly correct to use the two temporaries for this
// calculation. Reusing the APInts here to prevent unnecessary allocations.
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
// If we know the shifter operand is nonzero, we can sometimes infer more
// known bits. However this is expensive to compute, so be lazy about it and
@@ -858,9 +856,10 @@ static void computeKnownBitsFromShiftOperator(
return;
}
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
- KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
// Combine the shifted known input bits only for those shift amounts
// compatible with its known constraints.
@@ -879,8 +878,8 @@ static void computeKnownBitsFromShiftOperator(
continue;
}
- KnownZero &= KZF(KnownZero2, ShiftAmt);
- KnownOne &= KOF(KnownOne2, ShiftAmt);
+ Known.Zero &= KZF(Known2.Zero, ShiftAmt);
+ Known.One &= KOF(Known2.One, ShiftAmt);
}
// If there are no compatible shift amounts, then we've proven that the shift
@@ -888,33 +887,32 @@ static void computeKnownBitsFromShiftOperator(
// return anything we'd like, but we need to make sure the sets of known bits
// stay disjoint (it should be better for some other code to actually
// propagate the undef than to pick a value here using known bits).
- if ((KnownZero & KnownOne) != 0) {
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ if (Known.Zero.intersects(Known.One)) {
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
}
}
-static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth,
- const Query &Q) {
- unsigned BitWidth = KnownZero.getBitWidth();
+static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known,
+ unsigned Depth, const Query &Q) {
+ unsigned BitWidth = Known.getBitWidth();
- APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+ KnownBits Known2(Known);
switch (I->getOpcode()) {
default: break;
case Instruction::Load:
if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
+ computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One);
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// Output known-1 bits are only known if set in both the LHS & RHS.
- KnownOne &= KnownOne2;
+ Known.One &= Known2.One;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- KnownZero |= KnownZero2;
+ Known.Zero |= Known2.Zero;
// and(x, add (x, -1)) is a common idiom that always clears the low bit;
// here we handle the more general case of adding any odd number by
@@ -922,115 +920,115 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// TODO: This could be generalized to clearing any bit set in y where the
// following bit is known to be unset in y.
Value *Y = nullptr;
- if (!KnownZero[0] && !KnownOne[0] &&
+ if (!Known.Zero[0] && !Known.One[0] &&
(match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
m_Value(Y))) ||
match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
m_Value(Y))))) {
- KnownZero2.clearAllBits(); KnownOne2.clearAllBits();
- computeKnownBits(Y, KnownZero2, KnownOne2, Depth + 1, Q);
- if (KnownOne2.countTrailingOnes() > 0)
- KnownZero.setBit(0);
+ Known2.Zero.clearAllBits(); Known2.One.clearAllBits();
+ computeKnownBits(Y, Known2, Depth + 1, Q);
+ if (Known2.One.countTrailingOnes() > 0)
+ Known.Zero.setBit(0);
}
break;
}
case Instruction::Or: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// Output known-0 bits are only known if clear in both the LHS & RHS.
- KnownZero &= KnownZero2;
+ Known.Zero &= Known2.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
- KnownOne |= KnownOne2;
+ Known.One |= Known2.One;
break;
}
case Instruction::Xor: {
- computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+ APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
- KnownZero = std::move(KnownZeroOut);
+ Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+ Known.Zero = std::move(KnownZeroOut);
break;
}
case Instruction::Mul: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero,
- KnownOne, KnownZero2, KnownOne2, Depth, Q);
+ computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, Known,
+ Known2, Depth, Q);
break;
}
case Instruction::UDiv: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
- unsigned LeadZ = KnownZero2.countLeadingOnes();
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ unsigned LeadZ = Known2.Zero.countLeadingOnes();
- KnownOne2.clearAllBits();
- KnownZero2.clearAllBits();
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
- unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+ Known2.One.clearAllBits();
+ Known2.Zero.clearAllBits();
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ unsigned RHSUnknownLeadingOnes = Known2.One.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero.setHighBits(LeadZ);
+ Known.Zero.setHighBits(LeadZ);
break;
}
case Instruction::Select: {
const Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
if (SelectPatternResult::isMinOrMax(SPF)) {
- computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(RHS, Known, Depth + 1, Q);
+ computeKnownBits(LHS, Known2, Depth + 1, Q);
} else {
- computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
}
unsigned MaxHighOnes = 0;
unsigned MaxHighZeros = 0;
if (SPF == SPF_SMAX) {
// If both sides are negative, the result is negative.
- if (KnownOne.isSignBitSet() && KnownOne2.isSignBitSet())
+ if (Known.One.isSignBitSet() && Known2.One.isSignBitSet())
// We can derive a lower bound on the result by taking the max of the
// leading one bits.
- MaxHighOnes =
- std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+ MaxHighOnes = std::max(Known.One.countLeadingOnes(),
+ Known2.One.countLeadingOnes());
// If either side is non-negative, the result is non-negative.
- else if (KnownZero.isSignBitSet() || KnownZero2.isSignBitSet())
+ else if (Known.Zero.isSignBitSet() || Known2.Zero.isSignBitSet())
MaxHighZeros = 1;
} else if (SPF == SPF_SMIN) {
// If both sides are non-negative, the result is non-negative.
- if (KnownZero.isSignBitSet() && KnownZero2.isSignBitSet())
+ if (Known.Zero.isSignBitSet() && Known2.Zero.isSignBitSet())
// We can derive an upper bound on the result by taking the max of the
// leading zero bits.
- MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
- KnownZero2.countLeadingOnes());
+ MaxHighZeros = std::max(Known.Zero.countLeadingOnes(),
+ Known2.Zero.countLeadingOnes());
// If either side is negative, the result is negative.
- else if (KnownOne.isSignBitSet() || KnownOne2.isSignBitSet())
+ else if (Known.One.isSignBitSet() || Known2.One.isSignBitSet())
MaxHighOnes = 1;
} else if (SPF == SPF_UMAX) {
// We can derive a lower bound on the result by taking the max of the
// leading one bits.
MaxHighOnes =
- std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+ std::max(Known.One.countLeadingOnes(), Known2.One.countLeadingOnes());
} else if (SPF == SPF_UMIN) {
// We can derive an upper bound on the result by taking the max of the
// leading zero bits.
MaxHighZeros =
- std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes());
+ std::max(Known.Zero.countLeadingOnes(), Known2.Zero.countLeadingOnes());
}
// Only known if known in both the LHS and RHS.
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
if (MaxHighOnes > 0)
- KnownOne.setHighBits(MaxHighOnes);
+ Known.One.setHighBits(MaxHighOnes);
if (MaxHighZeros > 0)
- KnownZero.setHighBits(MaxHighZeros);
+ Known.Zero.setHighBits(MaxHighZeros);
break;
}
case Instruction::FPTrunc:
@@ -1054,14 +1052,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
SrcBitWidth = Q.DL.getTypeSizeInBits(SrcTy->getScalarType());
assert(SrcBitWidth && "SrcBitWidth can't be zero");
- KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
- KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
- KnownZero = KnownZero.zextOrTrunc(BitWidth);
- KnownOne = KnownOne.zextOrTrunc(BitWidth);
+ Known.Zero = Known.Zero.zextOrTrunc(SrcBitWidth);
+ Known.One = Known.One.zextOrTrunc(SrcBitWidth);
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ Known.Zero = Known.Zero.zextOrTrunc(BitWidth);
+ Known.One = Known.One.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
if (BitWidth > SrcBitWidth)
- KnownZero.setBitsFrom(SrcBitWidth);
+ Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
case Instruction::BitCast: {
@@ -1070,7 +1068,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
break;
}
break;
@@ -1079,13 +1077,13 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
- KnownZero = KnownZero.trunc(SrcBitWidth);
- KnownOne = KnownOne.trunc(SrcBitWidth);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+ Known.Zero = Known.Zero.trunc(SrcBitWidth);
+ Known.One = Known.One.trunc(SrcBitWidth);
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
- KnownZero = KnownZero.sext(BitWidth);
- KnownOne = KnownOne.sext(BitWidth);
+ Known.Zero = Known.Zero.sext(BitWidth);
+ Known.One = Known.One.sext(BitWidth);
break;
}
case Instruction::Shl: {
@@ -1108,9 +1106,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
return KOResult;
};
- computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
- KnownZero2, KnownOne2, Depth, Q, KZF,
- KOF);
+ computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
break;
}
case Instruction::LShr: {
@@ -1126,9 +1122,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
return KnownOne.lshr(ShiftAmt);
};
- computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
- KnownZero2, KnownOne2, Depth, Q, KZF,
- KOF);
+ computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
break;
}
case Instruction::AShr: {
@@ -1141,23 +1135,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
return KnownOne.ashr(ShiftAmt);
};
- computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
- KnownZero2, KnownOne2, Depth, Q, KZF,
- KOF);
+ computeKnownBitsFromShiftOperator(I, Known, Known2, Depth, Q, KZF, KOF);
break;
}
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, Depth,
- Q);
+ Known, Known2, Depth, Q);
break;
}
case Instruction::Add: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
- KnownZero, KnownOne, KnownZero2, KnownOne2, Depth,
- Q);
+ Known, Known2, Depth, Q);
break;
}
case Instruction::SRem:
@@ -1165,34 +1155,33 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1,
- Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// The low bits of the first operand are unchanged by the srem.
- KnownZero = KnownZero2 & LowBits;
- KnownOne = KnownOne2 & LowBits;
+ Known.Zero = Known2.Zero & LowBits;
+ Known.One = Known2.One & LowBits;
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (KnownZero2.isSignBitSet() || ((KnownZero2 & LowBits) == LowBits))
- KnownZero |= ~LowBits;
+ if (Known2.Zero.isSignBitSet() || ((Known2.Zero & LowBits) == LowBits))
+ Known.Zero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (KnownOne2.isSignBitSet() && ((KnownOne2 & LowBits) != 0))
- KnownOne |= ~LowBits;
+ if (Known2.One.isSignBitSet() && ((Known2.One & LowBits) != 0))
+ Known.One |= ~LowBits;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
break;
}
}
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// If it's known zero, our sign bit is also zero.
- if (KnownZero2.isSignBitSet())
- KnownZero.setSignBit();
+ if (Known2.Zero.isSignBitSet())
+ Known.Zero.setSignBit();
break;
case Instruction::URem: {
@@ -1200,23 +1189,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
const APInt &RA = Rem->getValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
- KnownZero |= ~LowBits;
- KnownOne &= LowBits;
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ Known.Zero |= ~LowBits;
+ Known.One &= LowBits;
break;
}
}
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
-
- unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
- KnownZero2.countLeadingOnes());
- KnownOne.clearAllBits();
- KnownZero.clearAllBits();
- KnownZero.setHighBits(Leaders);
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+
+ unsigned Leaders = std::max(Known.Zero.countLeadingOnes(),
+ Known2.Zero.countLeadingOnes());
+ Known.One.clearAllBits();
+ Known.Zero.clearAllBits();
+ Known.Zero.setHighBits(Leaders);
break;
}
@@ -1227,16 +1216,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
if (Align > 0)
- KnownZero.setLowBits(countTrailingZeros(Align));
+ Known.Zero.setLowBits(countTrailingZeros(Align));
break;
}
case Instruction::GetElementPtr: {
// Analyze all of the subscripts of this getelementptr instruction
// to determine if we can prove known low zero bits.
- APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, Depth + 1,
- Q);
- unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+ KnownBits LocalKnown(BitWidth);
+ computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
+ unsigned TrailZ = LocalKnown.Zero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
@@ -1266,15 +1254,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
}
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = Q.DL.getTypeAllocSize(IndexedTy);
- LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- computeKnownBits(Index, LocalKnownZero, LocalKnownOne, Depth + 1, Q);
+ LocalKnown.Zero = LocalKnown.One = APInt(GEPOpiBits, 0);
+ computeKnownBits(Index, LocalKnown, Depth + 1, Q);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
- LocalKnownZero.countTrailingOnes()));
+ LocalKnown.Zero.countTrailingOnes()));
}
}
- KnownZero.setLowBits(TrailZ);
+ Known.Zero.setLowBits(TrailZ);
break;
}
case Instruction::PHI: {
@@ -1309,14 +1297,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- computeKnownBits(R, KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(R, Known2, Depth + 1, Q);
// We need to take the minimum number of known bits
- APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q);
+ KnownBits Known3(Known);
+ computeKnownBits(L, Known3, Depth + 1, Q);
- KnownZero.setLowBits(std::min(KnownZero2.countTrailingOnes(),
- KnownZero3.countTrailingOnes()));
+ Known.Zero.setLowBits(std::min(Known2.Zero.countTrailingOnes(),
+ Known3.Zero.countTrailingOnes()));
if (DontImproveNonNegativePhiBits)
break;
@@ -1333,25 +1321,25 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// (add non-negative, non-negative) --> non-negative
// (add negative, negative) --> negative
if (Opcode == Instruction::Add) {
- if (KnownZero2.isSignBitSet() && KnownZero3.isSignBitSet())
- KnownZero.setSignBit();
- else if (KnownOne2.isSignBitSet() && KnownOne3.isSignBitSet())
- KnownOne.setSignBit();
+ if (Known2.Zero.isSignBitSet() && Known3.Zero.isSignBitSet())
+ Known.Zero.setSignBit();
+ else if (Known2.One.isSignBitSet() && Known3.One.isSignBitSet())
+ Known.One.setSignBit();
}
// (sub nsw non-negative, negative) --> non-negative
// (sub nsw negative, non-negative) --> negative
else if (Opcode == Instruction::Sub && LL == I) {
- if (KnownZero2.isSignBitSet() && KnownOne3.isSignBitSet())
- KnownZero.setSignBit();
- else if (KnownOne2.isSignBitSet() && KnownZero3.isSignBitSet())
- KnownOne.setSignBit();
+ if (Known2.Zero.isSignBitSet() && Known3.One.isSignBitSet())
+ Known.Zero.setSignBit();
+ else if (Known2.One.isSignBitSet() && Known3.Zero.isSignBitSet())
+ Known.One.setSignBit();
}
// (mul nsw non-negative, non-negative) --> non-negative
- else if (Opcode == Instruction::Mul && KnownZero2.isSignBitSet() &&
- KnownZero3.isSignBitSet())
- KnownZero.setSignBit();
+ else if (Opcode == Instruction::Mul && Known2.Zero.isSignBitSet() &&
+ Known3.Zero.isSignBitSet())
+ Known.Zero.setSignBit();
}
break;
@@ -1365,27 +1353,26 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+ if (Depth < MaxDepth - 1 && !Known.Zero && !Known.One) {
// Skip if every incoming value references to ourself.
if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
break;
- KnownZero.setAllBits();
- KnownOne.setAllBits();
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
for (Value *IncValue : P->incoming_values()) {
// Skip direct self references.
if (IncValue == P) continue;
- KnownZero2 = APInt(BitWidth, 0);
- KnownOne2 = APInt(BitWidth, 0);
+ Known2 = KnownBits(BitWidth);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- computeKnownBits(IncValue, KnownZero2, KnownOne2, MaxDepth - 1, Q);
- KnownZero &= KnownZero2;
- KnownOne &= KnownOne2;
+ computeKnownBits(IncValue, Known2, MaxDepth - 1, Q);
+ Known.Zero &= Known2.Zero;
+ Known.One &= Known2.One;
// If all bits have been ruled out, there's no need to check
// more operands.
- if (!KnownZero && !KnownOne)
+ if (!Known.Zero && !Known.One)
break;
}
}
@@ -1397,24 +1384,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// and then intersect with known bits based on other properties of the
// function.
if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
+ computeKnownBitsFromRangeMetadata(*MD, Known.Zero, Known.One);
if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
- computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q);
- KnownZero |= KnownZero2;
- KnownOne |= KnownOne2;
+ computeKnownBits(RV, Known2, Depth + 1, Q);
+ Known.Zero |= Known2.Zero;
+ Known.One |= Known2.One;
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::bitreverse:
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
- KnownZero |= KnownZero2.reverseBits();
- KnownOne |= KnownOne2.reverseBits();
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ Known.Zero |= Known2.Zero.reverseBits();
+ Known.One |= Known2.One.reverseBits();
break;
case Intrinsic::bswap:
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
- KnownZero |= KnownZero2.byteSwap();
- KnownOne |= KnownOne2.byteSwap();
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
+ Known.Zero |= Known2.Zero.byteSwap();
+ Known.One |= Known2.One.byteSwap();
break;
case Intrinsic::ctlz:
case Intrinsic::cttz: {
@@ -1422,22 +1409,22 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
LowBits -= 1;
- KnownZero.setBitsFrom(LowBits);
+ Known.Zero.setBitsFrom(LowBits);
break;
}
case Intrinsic::ctpop: {
- computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
// We can bound the space the count needs. Also, bits known to be zero
// can't contribute to the population.
- unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
+ unsigned BitsPossiblySet = BitWidth - Known2.Zero.countPopulation();
unsigned LowBits = Log2_32(BitsPossiblySet)+1;
- KnownZero.setBitsFrom(LowBits);
+ Known.Zero.setBitsFrom(LowBits);
// TODO: we could bound KnownOne using the lower bound on the number
// of bits which might be set provided by popcnt KnownOne2.
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero.setBitsFrom(32);
+ Known.Zero.setBitsFrom(32);
break;
}
}
@@ -1447,7 +1434,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// tracking the specific element. But at least we might find information
// valid for all elements of the vector (for example if vector is sign
// extended, shifted, etc).
- computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
break;
case Instruction::ExtractValue:
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
@@ -1459,20 +1446,19 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
computeKnownBitsAddSub(true, II->getArgOperand(0),
- II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, Depth, Q);
+ II->getArgOperand(1), false, Known, Known2,
+ Depth, Q);
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
computeKnownBitsAddSub(false, II->getArgOperand(0),
- II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, Depth, Q);
+ II->getArgOperand(1), false, Known, Known2,
+ Depth, Q);
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
- KnownZero, KnownOne, KnownZero2, KnownOne2, Depth,
- Q);
+ Known, Known2, Depth, Q);
break;
}
}
@@ -1481,7 +1467,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
}
/// Determine which bits of V are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bit sets.
+/// them in the Known bit set.
///
/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
/// we cannot optimize based on the assumption that it is zero without changing
@@ -1495,11 +1481,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
- unsigned Depth, const Query &Q) {
+void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
+ const Query &Q) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
- unsigned BitWidth = KnownZero.getBitWidth();
+ unsigned BitWidth = Known.getBitWidth();
assert((V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarType()->isPointerTy()) &&
@@ -1507,22 +1493,20 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
(!V->getType()->isIntOrIntVectorTy() ||
V->getType()->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "V, KnownOne and KnownZero should have same BitWidth");
+ "V and Known should have same BitWidth");
(void)BitWidth;
const APInt *C;
if (match(V, m_APInt(C))) {
// We know all of the bits for a scalar constant or a splat vector constant!
- KnownOne = *C;
- KnownZero = ~KnownOne;
+ Known.One = *C;
+ Known.Zero = ~Known.One;
return;
}
// Null and aggregate-zero are all-zeros.
if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
- KnownOne.clearAllBits();
- KnownZero.setAllBits();
+ Known.One.clearAllBits();
+ Known.Zero.setAllBits();
return;
}
// Handle a constant vector by taking the intersection of the known bits of
@@ -1530,12 +1514,12 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
// We know that CDS must be a vector of integers. Take the intersection of
// each element.
- KnownZero.setAllBits(); KnownOne.setAllBits();
- APInt Elt(KnownZero.getBitWidth(), 0);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ APInt Elt(BitWidth, 0);
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
Elt = CDS->getElementAsInteger(i);
- KnownZero &= ~Elt;
- KnownOne &= Elt;
+ Known.Zero &= ~Elt;
+ Known.One &= Elt;
}
return;
}
@@ -1543,25 +1527,25 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
if (const auto *CV = dyn_cast<ConstantVector>(V)) {
// We know that CV must be a vector of integers. Take the intersection of
// each element.
- KnownZero.setAllBits(); KnownOne.setAllBits();
- APInt Elt(KnownZero.getBitWidth(), 0);
+ Known.Zero.setAllBits(); Known.One.setAllBits();
+ APInt Elt(BitWidth, 0);
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
Constant *Element = CV->getAggregateElement(i);
auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
if (!ElementCI) {
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
return;
}
Elt = ElementCI->getValue();
- KnownZero &= ~Elt;
- KnownOne &= Elt;
+ Known.Zero &= ~Elt;
+ Known.One &= Elt;
}
return;
}
// Start out not knowing anything.
- KnownZero.clearAllBits(); KnownOne.clearAllBits();
+ Known.Zero.clearAllBits(); Known.One.clearAllBits();
// We can't imply anything about undefs.
if (isa<UndefValue>(V))
@@ -1580,27 +1564,27 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
// the bits of its aliasee.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (!GA->isInterposable())
- computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q);
+ computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
return;
}
if (const Operator *I = dyn_cast<Operator>(V))
- computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q);
+ computeKnownBitsFromOperator(I, Known, Depth, Q);
- // Aligned pointers have trailing zeros - refine KnownZero set
+ // Aligned pointers have trailing zeros - refine Known.Zero set
if (V->getType()->isPointerTy()) {
unsigned Align = V->getPointerAlignment(Q.DL);
if (Align)
- KnownZero.setLowBits(countTrailingZeros(Align));
+ Known.Zero.setLowBits(countTrailingZeros(Align));
}
- // computeKnownBitsFromAssume strictly refines KnownZero and
- // KnownOne. Therefore, we run them after computeKnownBitsFromOperator.
+ // computeKnownBitsFromAssume strictly refines Known.
+ // Therefore, we run them after computeKnownBitsFromOperator.
// Check whether a nearby assume intrinsic can determine some known bits.
- computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q);
+ computeKnownBitsFromAssume(V, Known, Depth, Q);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
}
/// Determine whether the sign bit is known to be zero or one.
@@ -1613,11 +1597,10 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
KnownOne = false;
return;
}
- APInt ZeroBits(BitWidth, 0);
- APInt OneBits(BitWidth, 0);
- computeKnownBits(V, ZeroBits, OneBits, Depth, Q);
- KnownOne = OneBits.isSignBitSet();
- KnownZero = ZeroBits.isSignBitSet();
+ KnownBits Bits(BitWidth);
+ computeKnownBits(V, Bits, Depth, Q);
+ KnownOne = Bits.One.isSignBitSet();
+ KnownZero = Bits.Zero.isSignBitSet();
}
/// Return true if the given value is known to have exactly one
@@ -1689,18 +1672,18 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
return true;
unsigned BitWidth = V->getType()->getScalarSizeInBits();
- APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
- computeKnownBits(X, LHSZeroBits, LHSOneBits, Depth, Q);
+ KnownBits LHSBits(BitWidth);
+ computeKnownBits(X, LHSBits, Depth, Q);
- APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
- computeKnownBits(Y, RHSZeroBits, RHSOneBits, Depth, Q);
+ KnownBits RHSBits(BitWidth);
+ computeKnownBits(Y, RHSBits, Depth, Q);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
- if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2())
+ if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
// If OrZero isn't set, we cannot give back a zero result.
// Make sure either the LHS or RHS has a bit set.
- if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue())
+ if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
return true;
}
}
@@ -1871,10 +1854,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (BO->hasNoUnsignedWrap())
return isKnownNonZero(X, Depth, Q);
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, Depth, Q);
- if (KnownOne[0])
+ KnownBits Known(BitWidth);
+ computeKnownBits(X, Known, Depth, Q);
+ if (Known.One[0])
return true;
}
// shr X, Y != 0 if X is negative. Note that the value of the shift is not
@@ -1894,16 +1876,15 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// out are known to be zero, and X is known non-zero then at least one
// non-zero bit must remain.
if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, Depth, Q);
+ KnownBits Known(BitWidth);
+ computeKnownBits(X, Known, Depth, Q);
auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
// Is there a known one in the portion not shifted out?
- if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+ if (Known.One.countLeadingZeros() < BitWidth - ShiftVal)
return true;
// Are all the bits to be shifted out known zero?
- if (KnownZero.countTrailingOnes() >= ShiftVal)
+ if (Known.Zero.countTrailingOnes() >= ShiftVal)
return isKnownNonZero(X, Depth, Q);
}
}
@@ -1927,18 +1908,17 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// If X and Y are both negative (as signed values) then their sum is not
// zero unless both X and Y equal INT_MIN.
if (BitWidth && XKnownNegative && YKnownNegative) {
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
+ KnownBits Known(BitWidth);
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- computeKnownBits(X, KnownZero, KnownOne, Depth, Q);
- if ((KnownOne & Mask) != 0)
+ computeKnownBits(X, Known, Depth, Q);
+ if (Known.One.intersects(Mask))
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- computeKnownBits(Y, KnownZero, KnownOne, Depth, Q);
- if ((KnownOne & Mask) != 0)
+ computeKnownBits(Y, Known, Depth, Q);
+ if (Known.One.intersects(Mask))
return true;
}
@@ -1993,10 +1973,9 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
}
if (!BitWidth) return false;
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
- return KnownOne != 0;
+ KnownBits Known(BitWidth);
+ computeKnownBits(V, Known, Depth, Q);
+ return Known.One != 0;
}
/// Return true if V2 == V1 + X, where X is known non-zero.
@@ -2028,14 +2007,13 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
// Are any known bits in V1 contradictory to known bits in V2? If V1
// has a known zero where V2 has a known one, they must not be equal.
auto BitWidth = Ty->getBitWidth();
- APInt KnownZero1(BitWidth, 0);
- APInt KnownOne1(BitWidth, 0);
- computeKnownBits(V1, KnownZero1, KnownOne1, 0, Q);
- APInt KnownZero2(BitWidth, 0);
- APInt KnownOne2(BitWidth, 0);
- computeKnownBits(V2, KnownZero2, KnownOne2, 0, Q);
-
- auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1);
+ KnownBits Known1(BitWidth);
+ computeKnownBits(V1, Known1, 0, Q);
+ KnownBits Known2(BitWidth);
+ computeKnownBits(V2, Known2, 0, Q);
+
+ APInt OppositeBits = (Known1.Zero & Known2.One) |
+ (Known2.Zero & Known1.One);
if (OppositeBits.getBoolValue())
return true;
}
@@ -2053,9 +2031,9 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
/// for all of the elements in the vector.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
const Query &Q) {
- APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
- return (KnownZero & Mask) == Mask;
+ KnownBits Known(Mask.getBitWidth());
+ computeKnownBits(V, Known, Depth, Q);
+ return Mask.isSubsetOf(Known.Zero);
}
/// For vector constants, loop over the elements and find the constant with the
@@ -2233,17 +2211,17 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// Special case decrementing a value (ADD X, -1):
if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+ KnownBits Known(TyBits);
+ computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnesValue())
return TyBits;
// If we are subtracting one from a positive number, there is no carry
// out of the result.
- if (KnownZero.isSignBitSet())
+ if (Known.Zero.isSignBitSet())
return Tmp;
}
@@ -2258,16 +2236,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
// Handle NEG.
if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
if (CLHS->isNullValue()) {
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(U->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
+ KnownBits Known(TyBits);
+ computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnesValue())
return TyBits;
// If the input is known to be positive (the sign bit is known clear),
// the output of the NEG has the same number of sign bits as the input.
- if (KnownZero.isSignBitSet())
+ if (Known.Zero.isSignBitSet())
return Tmp2;
// Otherwise, we treat this like a SUB.
@@ -2319,16 +2297,16 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
if (unsigned VecSignBits = computeNumSignBitsVectorConstant(V, TyBits))
return VecSignBits;
- APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
+ KnownBits Known(TyBits);
+ computeKnownBits(V, Known, Depth, Q);
// If we know that the sign bit is either zero or one, determine the number of
// identical bits in the top of the input value.
- if (KnownZero.isSignBitSet())
- return std::max(FirstAnswer, KnownZero.countLeadingOnes());
+ if (Known.Zero.isSignBitSet())
+ return std::max(FirstAnswer, Known.Zero.countLeadingOnes());
- if (KnownOne.isSignBitSet())
- return std::max(FirstAnswer, KnownOne.countLeadingOnes());
+ if (Known.One.isSignBitSet())
+ return std::max(FirstAnswer, Known.One.countLeadingOnes());
// computeKnownBits gave us no extra information about the top bits.
return FirstAnswer;
@@ -3534,26 +3512,22 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
// we can guarantee that the result does not overflow.
// Ref: "Hacker's Delight" by Henry Warren
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
- DT);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, DL, /*Depth=*/0, AC, CxtI,
- DT);
+ KnownBits LHSKnown(BitWidth);
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(LHS, LHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
+ computeKnownBits(RHS, RHSKnown, DL, /*Depth=*/0, AC, CxtI, DT);
// Note that underestimating the number of zero bits gives a more
// conservative answer.
- unsigned ZeroBits = LHSKnownZero.countLeadingOnes() +
- RHSKnownZero.countLeadingOnes();
+ unsigned ZeroBits = LHSKnown.Zero.countLeadingOnes() +
+ RHSKnown.Zero.countLeadingOnes();
// First handle the easy case: if we have enough zero bits there's
// definitely no overflow.
if (ZeroBits >= BitWidth)
return OverflowResult::NeverOverflows;
// Get the largest possible values for each operand.
- APInt LHSMax = ~LHSKnownZero;
- APInt RHSMax = ~RHSKnownZero;
+ APInt LHSMax = ~LHSKnown.Zero;
+ APInt RHSMax = ~RHSKnown.Zero;
// We know the multiply operation doesn't overflow if the maximum values for
// each operand will not overflow after we multiply them together.
@@ -3565,7 +3539,7 @@ OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
// We know it always overflows if multiplying the smallest possible values for
// the operands also results in overflow.
bool MinOverflow;
- (void)LHSKnownOne.umul_ov(RHSKnownOne, MinOverflow);
+ (void)LHSKnown.One.umul_ov(RHSKnown.One, MinOverflow);
if (MinOverflow)
return OverflowResult::AlwaysOverflows;
@@ -4284,11 +4258,10 @@ static bool isTruePredicate(CmpInst::Predicate Pred,
// If X & C == 0 then (X | C) == X +_{nuw} C
if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
- unsigned BitWidth = CA->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT);
+ KnownBits Known(CA->getBitWidth());
+ computeKnownBits(X, Known, DL, Depth + 1, AC, CxtI, DT);
- if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB)
+ if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero))
return true;
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1d3cde2f5ddb..e5aba03c8dc1 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -726,54 +726,50 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
}
void ModuleBitcodeWriter::writeAttributeGroupTable() {
- const std::vector<AttributeList> &AttrGrps = VE.getAttributeGroups();
+ const std::vector<ValueEnumerator::IndexAndAttrSet> &AttrGrps =
+ VE.getAttributeGroups();
if (AttrGrps.empty()) return;
Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3);
SmallVector<uint64_t, 64> Record;
- for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) {
- AttributeList AS = AttrGrps[i];
- for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) {
- AttributeList A = AS.getSlotAttributes(i);
-
- Record.push_back(VE.getAttributeGroupID(A));
- Record.push_back(AS.getSlotIndex(i));
-
- for (AttributeList::iterator I = AS.begin(0), E = AS.end(0); I != E;
- ++I) {
- Attribute Attr = *I;
- if (Attr.isEnumAttribute()) {
- Record.push_back(0);
- Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
- } else if (Attr.isIntAttribute()) {
- Record.push_back(1);
- Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
- Record.push_back(Attr.getValueAsInt());
- } else {
- StringRef Kind = Attr.getKindAsString();
- StringRef Val = Attr.getValueAsString();
-
- Record.push_back(Val.empty() ? 3 : 4);
- Record.append(Kind.begin(), Kind.end());
+ for (ValueEnumerator::IndexAndAttrSet Pair : AttrGrps) {
+ unsigned AttrListIndex = Pair.first;
+ AttributeSet AS = Pair.second;
+ Record.push_back(VE.getAttributeGroupID(Pair));
+ Record.push_back(AttrListIndex);
+
+ for (Attribute Attr : AS) {
+ if (Attr.isEnumAttribute()) {
+ Record.push_back(0);
+ Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+ } else if (Attr.isIntAttribute()) {
+ Record.push_back(1);
+ Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum()));
+ Record.push_back(Attr.getValueAsInt());
+ } else {
+ StringRef Kind = Attr.getKindAsString();
+ StringRef Val = Attr.getValueAsString();
+
+ Record.push_back(Val.empty() ? 3 : 4);
+ Record.append(Kind.begin(), Kind.end());
+ Record.push_back(0);
+ if (!Val.empty()) {
+ Record.append(Val.begin(), Val.end());
Record.push_back(0);
- if (!Val.empty()) {
- Record.append(Val.begin(), Val.end());
- Record.push_back(0);
- }
}
}
-
- Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record);
- Record.clear();
}
+
+ Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record);
+ Record.clear();
}
Stream.ExitBlock();
}
void ModuleBitcodeWriter::writeAttributeTable() {
- const std::vector<AttributeList> &Attrs = VE.getAttributes();
+ const std::vector<AttributeList> &Attrs = VE.getAttributeLists();
if (Attrs.empty()) return;
Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
@@ -782,7 +778,8 @@ void ModuleBitcodeWriter::writeAttributeTable() {
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
const AttributeList &A = Attrs[i];
for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i)
- Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i)));
+ Record.push_back(
+ VE.getAttributeGroupID({A.getSlotIndex(i), A.getSlotAttributes(i)}));
Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
Record.clear();
@@ -1270,7 +1267,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
Vals.push_back(F.getCallingConv());
Vals.push_back(F.isDeclaration());
Vals.push_back(getEncodedLinkage(F));
- Vals.push_back(VE.getAttributeID(F.getAttributes()));
+ Vals.push_back(VE.getAttributeListID(F.getAttributes()));
Vals.push_back(Log2_32(F.getAlignment())+1);
Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0);
Vals.push_back(getEncodedVisibility(F));
@@ -2616,7 +2613,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Code = bitc::FUNC_CODE_INST_INVOKE;
- Vals.push_back(VE.getAttributeID(II->getAttributes()));
+ Vals.push_back(VE.getAttributeListID(II->getAttributes()));
Vals.push_back(II->getCallingConv() | 1 << 13);
Vals.push_back(VE.getValueID(II->getNormalDest()));
Vals.push_back(VE.getValueID(II->getUnwindDest()));
@@ -2808,7 +2805,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Code = bitc::FUNC_CODE_INST_CALL;
- Vals.push_back(VE.getAttributeID(CI.getAttributes()));
+ Vals.push_back(VE.getAttributeListID(CI.getAttributes()));
unsigned Flags = getOptimizationFlags(&I);
Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV |
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 3800d9abd429..861150766986 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -891,19 +891,19 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
if (PAL.isEmpty()) return; // null is always 0.
// Do a lookup.
- unsigned &Entry = AttributeMap[PAL];
+ unsigned &Entry = AttributeListMap[PAL];
if (Entry == 0) {
// Never saw this before, add it.
- Attribute.push_back(PAL);
- Entry = Attribute.size();
+ AttributeLists.push_back(PAL);
+ Entry = AttributeLists.size();
}
// Do lookups for all attribute groups.
for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
- AttributeList AS = PAL.getSlotAttributes(i);
- unsigned &Entry = AttributeGroupMap[AS];
+ IndexAndAttrSet Pair = {PAL.getSlotIndex(i), PAL.getSlotAttributes(i)};
+ unsigned &Entry = AttributeGroupMap[Pair];
if (Entry == 0) {
- AttributeGroups.push_back(AS);
+ AttributeGroups.push_back(Pair);
Entry = AttributeGroups.size();
}
}
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 8a82aab29836..e7ccc8df1e5f 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -48,6 +48,10 @@ public:
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+ /// Attribute groups as encoded in bitcode are almost AttributeSets, but they
+ /// include the AttributeList index, so we have to track that in our map.
+ typedef std::pair<unsigned, AttributeSet> IndexAndAttrSet;
+
UseListOrderStack UseListOrders;
private:
@@ -102,13 +106,13 @@ private:
bool ShouldPreserveUseListOrder;
- typedef DenseMap<AttributeList, unsigned> AttributeGroupMapType;
+ typedef DenseMap<IndexAndAttrSet, unsigned> AttributeGroupMapType;
AttributeGroupMapType AttributeGroupMap;
- std::vector<AttributeList> AttributeGroups;
+ std::vector<IndexAndAttrSet> AttributeGroups;
- typedef DenseMap<AttributeList, unsigned> AttributeMapType;
- AttributeMapType AttributeMap;
- std::vector<AttributeList> Attribute;
+ typedef DenseMap<AttributeList, unsigned> AttributeListMapType;
+ AttributeListMapType AttributeListMap;
+ std::vector<AttributeList> AttributeLists;
/// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
/// the "getGlobalBasicBlockID" method.
@@ -166,16 +170,17 @@ public:
unsigned getInstructionID(const Instruction *I) const;
void setInstructionID(const Instruction *I);
- unsigned getAttributeID(AttributeList PAL) const {
+ unsigned getAttributeListID(AttributeList PAL) const {
if (PAL.isEmpty()) return 0; // Null maps to zero.
- AttributeMapType::const_iterator I = AttributeMap.find(PAL);
- assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
+ AttributeListMapType::const_iterator I = AttributeListMap.find(PAL);
+ assert(I != AttributeListMap.end() && "Attribute not in ValueEnumerator!");
return I->second;
}
- unsigned getAttributeGroupID(AttributeList PAL) const {
- if (PAL.isEmpty()) return 0; // Null maps to zero.
- AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL);
+ unsigned getAttributeGroupID(IndexAndAttrSet Group) const {
+ if (!Group.second.hasAttributes())
+ return 0; // Null maps to zero.
+ AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(Group);
assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!");
return I->second;
}
@@ -206,8 +211,8 @@ public:
const std::vector<const BasicBlock*> &getBasicBlocks() const {
return BasicBlocks;
}
- const std::vector<AttributeList> &getAttributes() const { return Attribute; }
- const std::vector<AttributeList> &getAttributeGroups() const {
+ const std::vector<AttributeList> &getAttributeLists() const { return AttributeLists; }
+ const std::vector<IndexAndAttrSet> &getAttributeGroups() const {
return AttributeGroups;
}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 955524c2a676..3a57772cc7f5 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -964,10 +964,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// sure to update that as well.
const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
if (!SU) continue;
- for (DbgValueVector::iterator DVI = DbgValues.begin(),
- DVE = DbgValues.end(); DVI != DVE; ++DVI)
- if (DVI->second == Q.second.Operand->getParent())
- UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
+ UpdateDbgValues(DbgValues, Q.second.Operand->getParent(),
+ AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 04f7f419f5ea..d14d93100adb 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -60,6 +60,25 @@ public:
if (MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == OldReg)
MI.getOperand(0).setReg(NewReg);
}
+
+ /// Update all DBG_VALUE instructions that may be affected by the dependency
+ /// breaker's update of ParentMI to use NewReg.
+ void UpdateDbgValues(const DbgValueVector &DbgValues, MachineInstr *ParentMI,
+ unsigned OldReg, unsigned NewReg) {
+ // The following code is dependent on the order in which the DbgValues are
+ // constructed in ScheduleDAGInstrs::buildSchedGraph.
+ MachineInstr *PrevDbgMI = nullptr;
+ for (const auto &DV : make_range(DbgValues.crbegin(), DbgValues.crend())) {
+ MachineInstr *PrevMI = DV.second;
+ if ((PrevMI == ParentMI) || (PrevMI == PrevDbgMI)) {
+ MachineInstr *DbgMI = DV.first;
+ UpdateDbgValue(*DbgMI, OldReg, NewReg);
+ PrevDbgMI = DbgMI;
+ } else if (PrevDbgMI) {
+ break; // If no match and already found a DBG_VALUE, we're done.
+ }
+ }
+ }
};
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 028c79f3ab6d..d99065b1b67a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -825,41 +825,25 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << Name << ":";
}
OS << V->getName();
-
- const DIExpression *Expr = MI->getDebugExpression();
- auto Fragment = Expr->getFragmentInfo();
- if (Fragment)
- OS << " [fragment offset=" << Fragment->OffsetInBits
- << " size=" << Fragment->SizeInBits << "]";
OS << " <- ";
// The second operand is only an offset if it's an immediate.
- bool Deref = false;
bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0;
- for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
- uint64_t Op = Expr->getElement(i);
- if (Op == dwarf::DW_OP_LLVM_fragment) {
- // There can't be any operands after this in a valid expression
- break;
- } else if (Deref) {
- // We currently don't support extra Offsets or derefs after the first
- // one. Bail out early instead of emitting an incorrect comment.
- OS << " [complex expression]";
- AP.OutStreamer->emitRawComment(OS.str());
- return true;
- } else if (Op == dwarf::DW_OP_deref) {
- Deref = true;
- continue;
- }
-
- uint64_t ExtraOffset = Expr->getElement(i++);
- if (Op == dwarf::DW_OP_plus)
- Offset += ExtraOffset;
- else {
- assert(Op == dwarf::DW_OP_minus);
- Offset -= ExtraOffset;
+ const DIExpression *Expr = MI->getDebugExpression();
+ if (Expr->getNumElements()) {
+ OS << '[';
+ bool NeedSep = false;
+ for (auto Op : Expr->expr_ops()) {
+ if (NeedSep)
+ OS << ", ";
+ else
+ NeedSep = true;
+ OS << dwarf::OperationEncodingString(Op.getOp());
+ for (unsigned I = 0; I < Op.getNumArgs(); ++I)
+ OS << ' ' << Op.getArg(I);
}
+ OS << "] ";
}
// Register or immediate value. Register 0 means undef.
@@ -890,7 +874,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering();
Offset += TFI->getFrameIndexReference(*AP.MF,
MI->getOperand(0).getIndex(), Reg);
- Deref = true;
+ MemLoc = true;
}
if (Reg == 0) {
// Suppress offset, it is not meaningful here.
@@ -899,12 +883,12 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
AP.OutStreamer->emitRawComment(OS.str());
return true;
}
- if (MemLoc || Deref)
+ if (MemLoc)
OS << '[';
OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
}
- if (MemLoc || Deref)
+ if (MemLoc)
OS << '+' << Offset << ']';
// NOTE: Want this comment at start of line, don't emit with AddComment.
@@ -936,6 +920,16 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
if (needsCFIMoves() == CFI_M_None)
return;
+ // If there is no "real" instruction following this CFI instruction, skip
+ // emitting it; it would be beyond the end of the function's FDE range.
+ auto *MBB = MI.getParent();
+ auto I = std::next(MI.getIterator());
+ while (I != MBB->end() && I->isTransient())
+ ++I;
+ if (I == MBB->instr_end() &&
+ MBB->getReverseIterator() == MBB->getParent()->rbegin())
+ return;
+
const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
const MCCFIInstruction &CFI = Instrs[CFIIndex];
@@ -1046,15 +1040,23 @@ void AsmPrinter::EmitFunctionBody() {
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
- if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) {
+ // Similarly, don't emit empty functions on Windows either. It can lead to
+ // duplicate entries (two functions with the same RVA) in the Guard CF Table
+ // after linking, causing the kernel not to load the binary:
+ // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html
+ // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer.
+ const Triple &TT = TM.getTargetTriple();
+ if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() ||
+ (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) {
MCInst Noop;
- MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop);
- OutStreamer->AddComment("avoids zero-length function");
+ MF->getSubtarget().getInstrInfo()->getNoop(Noop);
// Targets can opt-out of emitting the noop here by leaving the opcode
// unspecified.
- if (Noop.getOpcode())
+ if (Noop.getOpcode()) {
+ OutStreamer->AddComment("avoids zero-length function");
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+ }
}
const Function *F = MF->getFunction();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 683e622e3d53..a0bf1632dff3 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -144,6 +144,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
+ if (Dialect == InlineAsm::AD_Intel)
+ // We need this flag to be able to parse numbers like "0bH"
+ Parser->setParsingInlineAsm(true);
if (MF) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 31c2b3b5e752..30bfd7c94e68 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -655,20 +655,12 @@ void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
case dwarf::DW_FORM_ref_addr: {
// Get the absolute offset for this DIE within the debug info/types section.
unsigned Addr = Entry->getDebugSectionOffset();
- if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
- const DwarfDebug *DD = AP->getDwarfDebug();
- if (DD)
- assert(!DD->useSplitDwarf() &&
- "TODO: dwo files can't have relocations.");
- const DIEUnit *Unit = Entry->getUnit();
- assert(Unit && "CUDie should belong to a CU.");
- MCSection *Section = Unit->getSection();
- if (Section) {
- const MCSymbol *SectionSym = Section->getBeginSymbol();
- AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
- return;
- }
+ if (const MCSymbol *SectionSym =
+ Entry->getUnit()->getCrossSectionRelativeBaseAddress()) {
+ AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+ return;
}
+
AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form));
return;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 9a64b4b76b06..20a415150b4d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -28,7 +28,7 @@ class DwarfFile;
class MCSymbol;
class LexicalScope;
-class DwarfCompileUnit : public DwarfUnit {
+class DwarfCompileUnit final : public DwarfUnit {
/// A numeric ID unique among all CUs in the module
unsigned UniqueID;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d72656bcc58d..6f442f5c3172 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -91,14 +91,6 @@ DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
cl::init(Default));
static cl::opt<DefaultOnOff>
-SplitDwarf("split-dwarf", cl::Hidden,
- cl::desc("Output DWARF5 split debug info."),
- cl::values(clEnumVal(Default, "Default for platform"),
- clEnumVal(Enable, "Enabled"),
- clEnumVal(Disable, "Disabled")),
- cl::init(Default));
-
-static cl::opt<DefaultOnOff>
DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
cl::desc("Generate DWARF pubnames and pubtypes sections"),
cl::values(clEnumVal(Default, "Default for platform"),
@@ -253,11 +245,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
HasAppleExtensionAttributes = tuneForLLDB();
- // Handle split DWARF. Off by default for now.
- if (SplitDwarf == Default)
- HasSplitDwarf = false;
- else
- HasSplitDwarf = SplitDwarf == Enable;
+ // Handle split DWARF.
+ HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty();
// Pubnames/pubtypes on by default for GDB.
if (DwarfPubSections == Default)
@@ -412,7 +401,7 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
if (useSplitDwarf()) {
NewCU.setSkeleton(constructSkeletonCU(NewCU));
NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
- DIUnit->getSplitDebugFilename());
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
}
// LTO with assembly output shares a single line table amongst multiple CUs.
@@ -1885,7 +1874,7 @@ void DwarfDebug::emitDebugMacinfo() {
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
std::unique_ptr<DwarfCompileUnit> NewU) {
NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name,
- U.getCUNode()->getSplitDebugFilename());
+ Asm->TM.Options.MCOptions.SplitDwarfFile);
if (!CompilationDir.empty())
NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index f65dc151f301..ccd326917bfd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -117,8 +117,9 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// Otherwise, attempt to find a covering set of sub-register numbers.
// For example, Q0 on ARM is a composition of D0+D1.
unsigned CurPos = 0;
- // The size of the register in bits, assuming 8 bits per byte.
- unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
+ // The size of the register in bits.
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg);
+ unsigned RegSize = TRI.getRegSizeInBits(*RC);
// Keep track of the bits in the register we already emitted, so we
// can avoid emitting redundant aliasing subregs.
SmallBitVector Coverage(RegSize, false);
@@ -198,8 +199,10 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
unsigned MachineReg,
unsigned FragmentOffsetInBits) {
auto Fragment = ExprCursor.getFragmentInfo();
- if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U))
+ if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) {
+ LocationKind = Unknown;
return false;
+ }
bool HasComplexExpression = false;
auto Op = ExprCursor.peek();
@@ -212,6 +215,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// operation to multiple DW_OP_pieces.
if (HasComplexExpression && DwarfRegs.size() > 1) {
DwarfRegs.clear();
+ LocationKind = Unknown;
return false;
}
@@ -233,6 +237,7 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return Op.getOp() == dwarf::DW_OP_stack_value;
})) {
DwarfRegs.clear();
+ LocationKind = Unknown;
return false;
}
@@ -343,7 +348,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
- assert(LocationKind == Unknown || LocationKind == Implicit);
LocationKind = Implicit;
break;
case dwarf::DW_OP_swap:
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index bac0c204d04f..16fb20dd7e20 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1595,3 +1595,11 @@ void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
getCU().addGlobalTypeUnitType(Ty, Context);
}
+
+const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const {
+ if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+ return nullptr;
+ if (isDwoUnit())
+ return nullptr;
+ return getSection()->getBeginSymbol();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index d626ef920f95..e84df4650882 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -104,8 +104,6 @@ protected:
bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
public:
- virtual ~DwarfUnit();
-
// Accessors.
AsmPrinter* getAsmPrinter() const { return Asm; }
uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
@@ -289,6 +287,8 @@ public:
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
protected:
+ ~DwarfUnit();
+
/// Create new static data member DIE.
DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
@@ -335,9 +335,10 @@ private:
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
virtual bool isDwoUnit() const = 0;
+ const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
};
-class DwarfTypeUnit : public DwarfUnit {
+class DwarfTypeUnit final : public DwarfUnit {
uint64_t TypeSignature;
const DIE *Ty;
DwarfCompileUnit &CU;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index e1eeddf0816c..b2d6652b075e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -648,10 +648,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// as well.
const SUnit *SU = MISUnitMap[Q->second->getParent()];
if (!SU) continue;
- for (DbgValueVector::iterator DVI = DbgValues.begin(),
- DVE = DbgValues.end(); DVI != DVE; ++DVI)
- if (DVI->second == Q->second->getParent())
- UpdateDbgValue(*DVI->first, AntiDepReg, NewReg);
+ UpdateDbgValues(DbgValues, Q->second->getParent(),
+ AntiDepReg, NewReg);
}
// We just went back in time and modified history; the
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 26454c1ef00f..cf97c635e79a 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -145,6 +145,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+
// Now that selection is complete, there are no more generic vregs. Verify
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
@@ -165,7 +167,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
if (VRegToType.second.isValid() &&
- VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
+ VRegToType.second.getSizeInBits() > TRI.getRegSizeInBits(*RC)) {
reportGISelFailure(MF, TPC, MORE, "gisel-select",
"VReg has explicit size different from class size",
*MI);
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 58778077bc0e..ef5818dabe23 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -76,6 +76,12 @@ void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
+ case TargetOpcode::G_SDIV:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::SDIV_I32;
+ case TargetOpcode::G_UDIV:
+ assert(Size == 32 && "Unsupported size");
+ return RTLIB::UDIV_I32;
case TargetOpcode::G_FADD:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
@@ -87,31 +93,43 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
llvm_unreachable("Unknown libcall function");
}
+static LegalizerHelper::LegalizeResult
+simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
+ Type *OpType) {
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ const char *Name = TLI.getLibcallName(Libcall);
+ MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
+ CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
+ MachineOperand::CreateES(Name),
+ {MI.getOperand(0).getReg(), OpType},
+ {{MI.getOperand(1).getReg(), OpType},
+ {MI.getOperand(2).getReg(), OpType}});
+ MI.eraseFromParent();
+ return LegalizerHelper::Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::libcall(MachineInstr &MI) {
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
unsigned Size = Ty.getSizeInBits();
+ auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
MIRBuilder.setInstr(MI);
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_SDIV:
+ case TargetOpcode::G_UDIV: {
+ Type *Ty = Type::getInt32Ty(Ctx);
+ return simpleLibcall(MI, MIRBuilder, Size, Ty);
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
- auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
- auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
- auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
- const char *Name = TLI.getLibcallName(Libcall);
- MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
- CLI.lowerCall(
- MIRBuilder, TLI.getLibcallCallingConv(Libcall),
- MachineOperand::CreateES(Name), {MI.getOperand(0).getReg(), Ty},
- {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
- MI.eraseFromParent();
- return Legalized;
+ return simpleLibcall(MI, MIRBuilder, Size, Ty);
}
}
}
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 940957d02152..83b21e637097 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -48,7 +48,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
// Verify that the Size of the register bank is big enough to cover
// all the register classes it covers.
- assert((getSize() >= SubRC.getSize() * 8) &&
+ assert(getSize() >= TRI.getRegSizeInBits(SubRC) &&
"Size is not big enough for all the subclasses!");
assert(covers(SubRC) && "Not all subclasses are covered");
}
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index b2df2f159676..d5ae9a6776a4 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -421,7 +421,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
RC = MRI.getRegClass(Reg);
}
assert(RC && "Unable to deduce the register class");
- return RC->getSize() * 8;
+ return TRI.getRegSizeInBits(*RC);
}
//------------------------------------------------------------------------------
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 6da174a53666..b6624b88fe23 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -925,9 +925,6 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
<< CmpInst::getPredicateName(Pred) << ')';
break;
}
- case MachineOperand::MO_Placeholder:
- OS << "<placeholder>";
- break;
}
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 4bd5fbfe38e6..1faf6292a9c1 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -287,8 +287,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getIntrinsicID() == Other.getIntrinsicID();
case MachineOperand::MO_Predicate:
return getPredicate() == Other.getPredicate();
- case MachineOperand::MO_Placeholder:
- return true;
}
llvm_unreachable("Invalid machine operand type");
}
@@ -337,8 +335,6 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
case MachineOperand::MO_Predicate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
- case MachineOperand::MO_Placeholder:
- return hash_combine();
}
llvm_unreachable("Invalid machine operand type");
}
@@ -515,9 +511,6 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
<< CmpInst::getPredicateName(Pred) << '>';
break;
}
- case MachineOperand::MO_Placeholder:
- OS << "<placeholder>";
- break;
}
if (unsigned TF = getTargetFlags())
OS << "[TF=" << TF << ']';
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index b3d18435985e..7eb991744f01 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -330,7 +330,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
/// Return true if instruction stores to the specified frame.
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
// If we lost memory operands, conservatively assume that the instruction
- // writes to all slots.
+ // writes to all slots.
if (MI->memoperands_empty())
return true;
for (const MachineMemOperand *MemOp : MI->memoperands()) {
@@ -708,7 +708,7 @@ void MachineLICM::SinkIntoLoop() {
for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
I != Preheader->instr_end(); ++I) {
// We need to ensure that we can safely move this instruction into the loop.
- // As such, it must not have side-effects, e.g. such as a call has.
+ // As such, it must not have side-effects, e.g. such as a call has.
if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
Candidates.push_back(&*I);
}
@@ -837,9 +837,9 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
/// constant pool.
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
assert (MI.mayLoad() && "Expected MI that loads!");
-
+
// If we lost memory operands, conservatively assume that the instruction
- // reads from everything..
+ // reads from everything..
if (MI.memoperands_empty())
return true;
@@ -1337,7 +1337,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
// Since we are moving the instruction out of its basic block, we do not
- // retain its debug location. Doing so would degrade the debugging
+ // retain its debug location. Doing so would degrade the debugging
// experience and adversely affect the accuracy of profiling information.
MI->setDebugLoc(DebugLoc());
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 1354009794cb..570a0cd0ba90 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -373,22 +373,22 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
FixedSlot->Reg != Reg)
++FixedSlot;
+ unsigned Size = RegInfo->getSpillSize(*RC);
if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
// Nope, just spill it anywhere convenient.
- unsigned Align = RC->getAlignment();
+ unsigned Align = RegInfo->getSpillAlignment(*RC);
unsigned StackAlign = TFI->getStackAlignment();
// We may not be able to satisfy the desired alignment specification of
// the TargetRegisterClass if the stack alignment is smaller. Use the
// min.
Align = std::min(Align, StackAlign);
- FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true);
+ FrameIdx = MFI.CreateStackObject(Size, Align, true);
if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
- FrameIdx =
- MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ FrameIdx = MFI.CreateFixedSpillStackObject(Size, FixedSlot->Offset);
}
CS.setFrameIdx(FrameIdx);
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 283d84629f8e..c606b7b83310 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -212,8 +212,9 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
return SS; // Already has space allocated?
// Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
+ unsigned Size = TRI->getSpillSize(*RC);
+ unsigned Align = TRI->getSpillAlignment(*RC);
+ int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
// Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 6392136fa290..35db30f89976 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -395,8 +395,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Find an available scavenging slot with size and alignment matching
// the requirements of the class RC.
const MachineFrameInfo &MFI = MF.getFrameInfo();
- unsigned NeedSize = RC->getSize();
- unsigned NeedAlign = RC->getAlignment();
+ unsigned NeedSize = TRI->getSpillSize(*RC);
+ unsigned NeedAlign = TRI->getSpillAlignment(*RC);
unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4702d63cb617..1251ae6262b8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3878,27 +3878,29 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
return false;
+ SDValue N0 = N.getOperand(0);
+ unsigned Opc0 = N0.getOpcode();
+
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!N1C)
return false;
- unsigned Num;
+ unsigned MaskByteOffset;
switch (N1C->getZExtValue()) {
default:
return false;
- case 0xFF: Num = 0; break;
- case 0xFF00: Num = 1; break;
- case 0xFF0000: Num = 2; break;
- case 0xFF000000: Num = 3; break;
+ case 0xFF: MaskByteOffset = 0; break;
+ case 0xFF00: MaskByteOffset = 1; break;
+ case 0xFF0000: MaskByteOffset = 2; break;
+ case 0xFF000000: MaskByteOffset = 3; break;
}
// Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
- SDValue N0 = N.getOperand(0);
if (Opc == ISD::AND) {
- if (Num == 0 || Num == 2) {
+ if (MaskByteOffset == 0 || MaskByteOffset == 2) {
// (x >> 8) & 0xff
// (x >> 8) & 0xff0000
- if (N0.getOpcode() != ISD::SRL)
+ if (Opc0 != ISD::SRL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
@@ -3906,7 +3908,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
} else {
// (x << 8) & 0xff00
// (x << 8) & 0xff000000
- if (N0.getOpcode() != ISD::SHL)
+ if (Opc0 != ISD::SHL)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (!C || C->getZExtValue() != 8)
@@ -3915,7 +3917,7 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
} else if (Opc == ISD::SHL) {
// (x & 0xff) << 8
// (x & 0xff0000) << 8
- if (Num != 0 && Num != 2)
+ if (MaskByteOffset != 0 && MaskByteOffset != 2)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
@@ -3923,17 +3925,17 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
} else { // Opc == ISD::SRL
// (x & 0xff00) >> 8
// (x & 0xff000000) >> 8
- if (Num != 1 && Num != 3)
+ if (MaskByteOffset != 1 && MaskByteOffset != 3)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!C || C->getZExtValue() != 8)
return false;
}
- if (Parts[Num])
+ if (Parts[MaskByteOffset])
return false;
- Parts[Num] = N0.getOperand(0).getNode();
+ Parts[MaskByteOffset] = N0.getOperand(0).getNode();
return true;
}
@@ -4198,20 +4200,22 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// reassociate or
if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
return ROR;
+
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) != 0.
- if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
- isa<ConstantSDNode>(N0.getOperand(1))) {
- ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
- if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
- if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
- N1C, C1))
- return DAG.getNode(
- ISD::AND, SDLoc(N), VT,
- DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
- return SDValue();
+ if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
+ if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
+ if (SDValue COR =
+ DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
+ return DAG.getNode(
+ ISD::AND, SDLoc(N), VT,
+ DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
+ return SDValue();
+ }
}
}
+
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
@@ -5611,24 +5615,24 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
- N0.getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- // This is only valid if the OpSizeInBits + c1 = size of inner shift.
- if (c1 + OpSizeInBits == InnerShiftSize) {
- SDLoc DL(N0);
- if (c1 + c2 >= InnerShiftSize)
- return DAG.getConstant(0, DL, VT);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(ISD::SRL, DL, InnerShiftVT,
- N0.getOperand(0)->getOperand(0),
- DAG.getConstant(c1 + c2, DL,
- ShiftCountVT)));
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+ uint64_t c1 = N001C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ SDLoc DL(N0);
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+ N0.getOperand(0).getOperand(0),
+ DAG.getConstant(c1 + c2, DL,
+ ShiftCountVT)));
+ }
}
}
@@ -11641,7 +11645,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
// Check if this is a trunc(lshr).
if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
isa<ConstantSDNode>(User->getOperand(1))) {
- Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
+ Shift = User->getConstantOperandVal(1);
User = *User->use_begin();
}
@@ -13120,8 +13124,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// do this only if indices are both constants and Idx1 < Idx0.
if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
&& isa<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned OtherElt =
- cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ unsigned OtherElt = InVec.getConstantOperandVal(2);
if (Elt < OtherElt) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
@@ -14065,7 +14068,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
- int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ int ExtIdx = Op.getConstantOperandVal(1);
// Ensure that we are extracting a subvector from a vector the same
// size as the result.
@@ -15049,7 +15052,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
N1.getValueType() == N0.getOperand(1).getValueType() &&
isa<ConstantSDNode>(N0.getOperand(2))) {
- unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
+ unsigned OtherIdx = N0.getConstantOperandVal(2);
if (InsIdx < OtherIdx) {
// Swap nodes.
SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
@@ -16088,6 +16091,19 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
if (Op1->isInvariant() && Op0->writeMem())
return false;
+ unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
+ unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+
+ // Check for BaseIndexOffset matching.
+ BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
+ BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG);
+ if (BasePtr0.equalBaseIndex(BasePtr1))
+ return !((BasePtr0.Offset + NumBytes0 <= BasePtr1.Offset) ||
+ (BasePtr1.Offset + NumBytes1 <= BasePtr0.Offset));
+
+ // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis
+ // modified to use BaseIndexOffset.
+
// Gather base node and offset information.
SDValue Base0, Base1;
int64_t Offset0, Offset1;
@@ -16099,8 +16115,6 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
Base1, Offset1, GV1, CV1);
// If they have the same base address, then check to see if they overlap.
- unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
- unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1)))
return !((Offset0 + NumBytes0) <= Offset1 ||
(Offset1 + NumBytes1) <= Offset0);
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index e85d1951e3ae..b235e19aaab2 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -161,7 +161,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (VRBase) {
DstRC = MRI->getRegClass(VRBase);
} else if (UseRC) {
- assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+ assert(TRI->isTypeLegalForClass(*UseRC, VT) &&
+ "Incompatible phys register def and uses!");
DstRC = UseRC;
} else {
DstRC = TLI->getRegClassFor(VT);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3bae3bf9ab7c..fdebb8bd00db 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3497,11 +3497,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// part.
unsigned LoSize = VT.getSizeInBits();
SDValue HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(LoSize - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
SDValue HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
DAG.getConstant(LoSize - 1, dl,
TLI.getPointerTy(DAG.getDataLayout())));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 85068e890756..9ed70c9b4db9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3251,7 +3251,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
Ops.push_back(Op);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+ return DAG.getBuildVector(NOutVT, dl, Ops);
}
@@ -3294,7 +3294,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
Ops.push_back(Op);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+ return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
@@ -3342,7 +3342,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
+ return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
@@ -3445,5 +3445,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps);
+ return DAG.getBuildVector(N->getValueType(0), dl, NewOps);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c02b8960b36c..aa69e0e2adfc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -362,8 +362,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SmallVector<SDValue, 8> Ops;
IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
- makeArrayRef(Ops.data(), NumElts));
+ SDValue Vec =
+ DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts));
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
@@ -396,10 +396,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
NewElts.push_back(Hi);
}
- SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
- EVT::getVectorVT(*DAG.getContext(),
- NewVT, NewElts.size()),
- NewElts);
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size());
+ SDValue NewVec = DAG.getBuildVector(NewVecVT, dl, NewElts);
// Convert the new vector to the old vector type.
return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
@@ -458,7 +456,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
for (unsigned i = 1; i < NumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1a7d7b7af5fa..4a3160297d64 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -512,7 +512,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
N->getValueType(0).getScalarType(), Elt);
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op);
}
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -523,16 +523,16 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
}
-/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
-/// so just return the element, ignoring the index.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
- SDValue Res = GetScalarizedVector(N->getOperand(0));
- if (Res.getValueType() != N->getValueType(0))
- Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0),
- Res);
- return Res;
-}
-
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
+/// so just return the element, ignoring the index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != VT)
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
+ return Res;
+}
+
/// If the input condition is a vector that needs to be scalarized, it must be
/// <1 x i1>, so just convert to a normal ISD::SELECT
@@ -2631,7 +2631,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
if (InVT.isVector())
NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
else
- NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ NewVec = DAG.getBuildVector(NewInVT, dl, Ops);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e923e30e5037..69b76fbe57d2 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1320,6 +1320,18 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
RegAdded, LRegs);
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+ if (MCID.hasOptionalDef()) {
+ // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
+ // This operand can be either a def of CPSR, if the S bit is set; or a use
+ // of %noreg. When the OptionalDef is set to a valid register, we need to
+ // handle it in the same way as an ImplicitDef.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
+ if (MCID.OpInfo[i].isOptionalDef()) {
+ const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
+ unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
+ }
+ }
if (!MCID.ImplicitDefs)
continue;
for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 523f409e6b2c..439f67f1e155 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
@@ -2868,7 +2869,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// A left-shift of a constant one will have exactly one bit set because
// shifting the bit off the end is undefined.
if (Val.getOpcode() == ISD::SHL) {
- auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue() == 1)
return true;
}
@@ -2876,7 +2877,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// Similarly, a logical right-shift of a constant sign-bit will have exactly
// one bit set.
if (Val.getOpcode() == ISD::SRL) {
- auto *C = dyn_cast<ConstantSDNode>(Val.getOperand(0));
+ auto *C = isConstOrConstSplat(Val.getOperand(0));
if (C && C->getAPIntValue().isSignMask())
return true;
}
@@ -7539,10 +7540,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t GVOffset = 0;
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
- APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
- llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+ KnownBits Known(PtrWidth);
+ llvm::computeKnownBits(const_cast<GlobalValue *>(GV), Known,
getDataLayout());
- unsigned AlignBits = KnownZero.countTrailingOnes();
+ unsigned AlignBits = Known.Zero.countTrailingOnes();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
return MinAlign(Align, GVOffset);
@@ -7629,52 +7630,52 @@ Type *ConstantPoolSDNode::getType() const {
return Val.ConstVal->getType();
}
-bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
- APInt &SplatUndef,
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
unsigned &SplatBitSize,
bool &HasAnyUndefs,
unsigned MinSplatBits,
- bool isBigEndian) const {
+ bool IsBigEndian) const {
EVT VT = getValueType(0);
assert(VT.isVector() && "Expected a vector type");
- unsigned sz = VT.getSizeInBits();
- if (MinSplatBits > sz)
+ unsigned VecWidth = VT.getSizeInBits();
+ if (MinSplatBits > VecWidth)
return false;
- SplatValue = APInt(sz, 0);
- SplatUndef = APInt(sz, 0);
+ // FIXME: The widths are based on this node's type, but build vectors can
+ // truncate their operands.
+ SplatValue = APInt(VecWidth, 0);
+ SplatUndef = APInt(VecWidth, 0);
- // Get the bits. Bits with undefined values (when the corresponding element
+ // Get the bits. Bits with undefined values (when the corresponding element
// of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
- // in SplatValue. If any of the values are not constant, give up and return
+ // in SplatValue. If any of the values are not constant, give up and return
// false.
- unsigned int nOps = getNumOperands();
- assert(nOps > 0 && "isConstantSplat has 0-size build vector");
- unsigned EltBitSize = VT.getScalarSizeInBits();
+ unsigned int NumOps = getNumOperands();
+ assert(NumOps > 0 && "isConstantSplat has 0-size build vector");
+ unsigned EltWidth = VT.getScalarSizeInBits();
- for (unsigned j = 0; j < nOps; ++j) {
- unsigned i = isBigEndian ? nOps-1-j : j;
+ for (unsigned j = 0; j < NumOps; ++j) {
+ unsigned i = IsBigEndian ? NumOps - 1 - j : j;
SDValue OpVal = getOperand(i);
- unsigned BitPos = j * EltBitSize;
+ unsigned BitPos = j * EltWidth;
if (OpVal.isUndef())
- SplatUndef.setBits(BitPos, BitPos + EltBitSize);
- else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize),
- BitPos);
- else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+ SplatUndef.setBits(BitPos, BitPos + EltWidth);
+ else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal))
+ SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+ else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal))
SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
else
return false;
}
- // The build_vector is all constants or undefs. Find the smallest element
+ // The build_vector is all constants or undefs. Find the smallest element
// size that splats the vector.
-
HasAnyUndefs = (SplatUndef != 0);
- while (sz > 8) {
- unsigned HalfSize = sz / 2;
+ // FIXME: This does not work for vectors with elements less than 8 bits.
+ while (VecWidth > 8) {
+ unsigned HalfSize = VecWidth / 2;
APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
APInt LowValue = SplatValue.trunc(HalfSize);
APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
@@ -7688,10 +7689,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
SplatValue = HighValue | LowValue;
SplatUndef = HighUndef & LowUndef;
- sz = HalfSize;
+ VecWidth = HalfSize;
}
- SplatBitSize = sz;
+ SplatBitSize = VecWidth;
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2c58953ee908..6a737ed84ea4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -362,11 +362,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getUNDEF(ValueVT);
}
- if (ValueVT.getVectorNumElements() == 1 &&
- ValueVT.getVectorElementType() != PartEVT)
- Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
+ EVT ValueSVT = ValueVT.getVectorElementType();
+ if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
+ Val = DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+ return DAG.getBuildVector(ValueVT, DL, Val);
}
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
@@ -537,7 +537,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
- Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops);
+ Val = DAG.getBuildVector(PartVT, DL, Ops);
// FIXME: Use CONCAT for 2x -> 4x.
@@ -1088,8 +1088,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ArrayType>(CDS->getType()))
return DAG.getMergeValues(Ops, getCurSDLoc());
- return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(),
- VT, Ops);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
@@ -1141,7 +1140,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
// Create a BUILD_VECTOR node.
- return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops);
+ return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
}
// If this is a static alloca, generate it as the frameindex instead of
@@ -3147,7 +3146,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Ops.push_back(Res);
}
- setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops));
+ setValue(&I, DAG.getBuildVector(VT, DL, Ops));
}
void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3969,9 +3968,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
SDValue Ops[3];
Ops[0] = getRoot();
Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
- TLI.getPointerTy(DAG.getDataLayout()));
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
- TLI.getPointerTy(DAG.getDataLayout()));
+ TLI.getFenceOperandTy(DAG.getDataLayout()));
DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
@@ -4896,11 +4895,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Entry.Node = Src;
Args.push_back(Entry);
-
+
Entry.Ty = I.getArgOperand(2)->getType();
Entry.Node = NumElements;
Args.push_back(Entry);
-
+
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
Entry.Node = ElementSize;
Args.push_back(Entry);
@@ -5183,7 +5182,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShOps[2];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
+ ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
@@ -5743,7 +5742,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
unsigned Opcode;
switch (Intrinsic) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fadd:
Opcode = ISD::STRICT_FADD;
break;
case Intrinsic::experimental_constrained_fsub:
@@ -6653,12 +6652,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
MachineFunction &MF = DAG.getMachineFunction();
SmallVector<unsigned, 4> Regs;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
std::pair<unsigned, const TargetRegisterClass *> PhysReg =
- TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(),
- OpInfo.ConstraintCode,
+ TLI.getRegForInlineAsmConstraint(&TRI, OpInfo.ConstraintCode,
OpInfo.ConstraintVT);
unsigned NumRegs = 1;
@@ -6666,12 +6665,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// If this is a FP input in an integer register (or visa versa) insert a bit
// cast of the input value. More generally, handle any case where the input
// value disagrees with the register class we plan to stick this in.
- if (OpInfo.Type == InlineAsm::isInput &&
- PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+ if (OpInfo.Type == InlineAsm::isInput && PhysReg.second &&
+ !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) {
// Try to convert to the first EVT that the reg class contains. If the
// types are identical size, use a bitcast to convert (e.g. two differing
// vector types).
- MVT RegVT = *PhysReg.second->vt_begin();
+ MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second);
if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) {
OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
RegVT, OpInfo.CallOperand);
@@ -6699,12 +6698,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
if (unsigned AssignedReg = PhysReg.first) {
const TargetRegisterClass *RC = PhysReg.second;
if (OpInfo.ConstraintVT == MVT::Other)
- ValueVT = *RC->vt_begin();
+ ValueVT = *TRI.legalclasstypes_begin(*RC);
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
// remember that AX is actually i16 to get the right extension.
- RegVT = *RC->vt_begin();
+ RegVT = *TRI.legalclasstypes_begin(*RC);
// This is a explicit reference to a physical register.
Regs.push_back(AssignedReg);
@@ -6730,7 +6729,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
// Otherwise, if this was a reference to an LLVM register class, create vregs
// for this reference.
if (const TargetRegisterClass *RC = PhysReg.second) {
- RegVT = *RC->vt_begin();
+ RegVT = *TRI.legalclasstypes_begin(*RC);
if (OpInfo.ConstraintVT == MVT::Other)
ValueVT = RegVT;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 93c6738f650d..136dec873cb8 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -342,11 +342,16 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// If the specified instruction has a constant integer operand and there are
/// bits set in that constant that are not demanded, then clear those bits and
/// return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(
- SDValue Op, const APInt &Demanded) {
+bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
+ SelectionDAG &DAG = TLO.DAG;
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
+ // Do target-specific constant optimization.
+ if (targetShrinkDemandedConstant(Op, Demanded, TLO))
+ return TLO.New.getNode();
+
// FIXME: ISD::SELECT, ISD::SELECT_CC
switch (Opcode) {
default:
@@ -367,7 +372,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(
EVT VT = Op.getValueType();
SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
- return CombineTo(Op, NewOp);
+ return TLO.CombineTo(Op, NewOp);
}
break;
@@ -380,15 +385,17 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
- unsigned BitWidth,
- const APInt &Demanded,
- const SDLoc &dl) {
+bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
+ const APInt &Demanded,
+ TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
+ SelectionDAG &DAG = TLO.DAG;
+ SDLoc dl(Op);
+
// Early return, as this function cannot handle vector types.
if (Op.getValueType().isVector())
return false;
@@ -418,23 +425,22 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
bool NeedZext = DemandedSize > SmallVTBits;
SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
dl, Op.getValueType(), X);
- return CombineTo(Op, Z);
+ return TLO.CombineTo(Op, Z);
}
}
return false;
}
bool
-TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
- unsigned OpIdx,
- const APInt &Demanded,
- DAGCombinerInfo &DCI) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
+ const APInt &Demanded,
+ DAGCombinerInfo &DCI,
+ TargetLoweringOpt &TLO) const {
SDValue Op = User->getOperand(OpIdx);
APInt KnownZero, KnownOne;
- if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
- *this, 0, true))
+ if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
+ TLO, 0, true))
return false;
@@ -446,9 +452,9 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
// with the value 'x', which will give us:
// Old = i32 and x, 0xffffff
// New = x
- if (Old.hasOneUse()) {
+ if (TLO.Old.hasOneUse()) {
// For the one use case, we just commit the change.
- DCI.CommitTargetLoweringOpt(*this);
+ DCI.CommitTargetLoweringOpt(TLO);
return true;
}
@@ -456,17 +462,17 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
// AssumeSingleUse flag is not propogated to recursive calls of
// SimplifyDemanded bits, so the only node with multiple use that
// it will attempt to combine will be opt.
- assert(Old == Op);
+ assert(TLO.Old == Op);
SmallVector <SDValue, 4> NewOps;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
if (i == OpIdx) {
- NewOps.push_back(New);
+ NewOps.push_back(TLO.New);
continue;
}
NewOps.push_back(User->getOperand(i));
}
- DAG.UpdateNodeOperands(User, NewOps);
+ TLO.DAG.UpdateNodeOperands(User, NewOps);
// Op has less users now, so we may be able to perform additional combines
// with it.
DCI.AddToWorklist(Op.getNode());
@@ -585,7 +591,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If any of the set bits in the RHS are known zero on the LHS, shrink
// the constant.
- if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+ if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO))
return true;
// Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
@@ -620,10 +626,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType()));
// If the RHS is a constant, see if we can simplify it.
- if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+ if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -654,10 +660,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
return TLO.CombineTo(Op, Op.getOperand(1));
// If the RHS is a constant, see if we can simplify it.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -682,7 +688,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownZero2 & NewMask) == NewMask)
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
return true;
// If all of the unknown bits are known to be zero on one side or the other
@@ -727,7 +733,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
// If it already has all the bits set, nothing to change
// but don't shrink either!
- } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ } else if (ShrinkDemandedConstant(Op, NewMask, TLO)) {
return true;
}
}
@@ -746,7 +752,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -764,7 +770,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
+ if (ShrinkDemandedConstant(Op, NewMask, TLO))
return true;
// Only known if known in both the LHS and RHS.
@@ -1284,7 +1290,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
KnownOne2, TLO, Depth+1) ||
// See if the operation should be performed at a smaller bit width.
- TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) {
+ ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
const SDNodeFlags *Flags = Op.getNode()->getFlags();
if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
@@ -1358,31 +1364,38 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
return 1;
}
+// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
+// work with truncating build vectors and vectors with elements of less than
+// 8 bits.
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!N)
return false;
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- if (!CN) {
- const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
- if (!BV)
- return false;
-
- // Only interested in constant splats, we don't care about undef
- // elements in identifying boolean constants and getConstantSplatNode
- // returns NULL if all ops are undef;
- CN = BV->getConstantSplatNode();
+ APInt CVal;
+ if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+ CVal = CN->getAPIntValue();
+ } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ auto *CN = BV->getConstantSplatNode();
if (!CN)
return false;
+
+ // If this is a truncating build vector, truncate the splat value.
+ // Otherwise, we may fail to match the expected values below.
+ unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
+ CVal = CN->getAPIntValue();
+ if (BVEltWidth < CVal.getBitWidth())
+ CVal = CVal.trunc(BVEltWidth);
+ } else {
+ return false;
}
switch (getBooleanContents(N->getValueType(0))) {
case UndefinedBooleanContent:
- return CN->getAPIntValue()[0];
+ return CVal[0];
case ZeroOrOneBooleanContent:
- return CN->isOne();
+ return CVal == 1;
case ZeroOrNegativeOneBooleanContent:
- return CN->isAllOnesValue();
+ return CVal.isAllOnesValue();
}
llvm_unreachable("Invalid boolean contents");
@@ -2535,7 +2548,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
for (const TargetRegisterClass *RC : RI->regclasses()) {
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
- if (!isLegalRC(RC))
+ if (!isLegalRC(*RI, *RC))
continue;
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
@@ -2547,9 +2560,9 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
// If this register class has the requested value type, return it,
// otherwise keep searching and return the first class found
// if no other is found which explicitly has the requested type.
- if (RC->hasType(VT))
+ if (RI->isTypeLegalForClass(*RC, VT))
return S;
- else if (!R.second)
+ if (!R.second)
R = S;
}
}
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 1a8ec5bff322..315b059c5ac9 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -161,7 +161,8 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (SubRegIdx)
Offset = TRI->getSubRegIdxOffset(SubRegIdx);
- Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset);
+ Locs.emplace_back(Location::Register, TRI->getSpillSize(*RC),
+ DwarfRegNum, Offset);
return ++MOI;
}
@@ -245,7 +246,7 @@ void StackMaps::print(raw_ostream &OS) {
StackMaps::LiveOutReg
StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI);
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+ unsigned Size = TRI->getSpillSize(*TRI->getMinimalPhysRegClass(Reg));
return LiveOutReg(Reg, DwarfRegNum, Size);
}
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 711144a34743..14c5adc0d898 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -345,12 +345,12 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
unsigned SubIdx, unsigned &Size,
unsigned &Offset,
const MachineFunction &MF) const {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!SubIdx) {
- Size = RC->getSize();
+ Size = TRI->getSpillSize(*RC);
Offset = 0;
return true;
}
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned BitSize = TRI->getSubRegIdxSize(SubIdx);
// Convert bit size to byte size to be consistent with
// MCRegisterClass::getSize().
@@ -364,10 +364,10 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
Size = BitSize /= 8;
Offset = (unsigned)BitOffset / 8;
- assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
+ assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range");
if (!MF.getDataLayout().isLittleEndian()) {
- Offset = RC->getSize() - (Offset + Size);
+ Offset = TRI->getSpillSize(*RC) - (Offset + Size);
}
return true;
}
@@ -428,8 +428,8 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
return nullptr;
}
-void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
- llvm_unreachable("Not a MachO target");
+void TargetInstrInfo::getNoop(MCInst &NopInst) const {
+ llvm_unreachable("Not implemented");
}
static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 27630a3055cb..e579922bb69e 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1184,12 +1184,11 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
/// isLegalRC - Return true if the value types that can be represented by the
/// specified register class are all legal.
-bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
+bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
+ const TargetRegisterClass &RC) const {
+ for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
if (isTypeLegal(*I))
return true;
- }
return false;
}
@@ -1299,9 +1298,9 @@ TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) {
const TargetRegisterClass *SuperRC = TRI->getRegClass(i);
// We want the largest possible spill size.
- if (SuperRC->getSize() <= BestRC->getSize())
+ if (TRI->getSpillSize(*SuperRC) <= TRI->getSpillSize(*BestRC))
continue;
- if (!isLegalRC(SuperRC))
+ if (!isLegalRC(*TRI, *SuperRC))
continue;
BestRC = SuperRC;
}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 66cdad278e8d..f6e4c17d514c 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -156,8 +156,8 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
// this physreg.
const TargetRegisterClass* BestRC = nullptr;
for (const TargetRegisterClass* RC : regclasses()) {
- if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || BestRC->hasSubClass(RC)))
+ if ((VT == MVT::Other || isTypeLegalForClass(*RC, VT)) &&
+ RC->contains(reg) && (!BestRC || BestRC->hasSubClass(RC)))
BestRC = RC;
}
@@ -207,7 +207,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A,
if (unsigned Common = *A++ & *B++) {
const TargetRegisterClass *RC =
TRI->getRegClass(I + countTrailingZeros(Common));
- if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT))
+ if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT))
return RC;
}
return nullptr;
@@ -265,7 +265,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
const TargetRegisterClass *BestRC = nullptr;
unsigned *BestPreA = &PreA;
unsigned *BestPreB = &PreB;
- if (RCA->getSize() < RCB->getSize()) {
+ if (getRegSizeInBits(*RCA) < getRegSizeInBits(*RCB)) {
std::swap(RCA, RCB);
std::swap(SubA, SubB);
std::swap(BestPreA, BestPreB);
@@ -273,7 +273,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
// Also terminate the search one we have found a register class as small as
// RCA.
- unsigned MinSize = RCA->getSize();
+ unsigned MinSize = getRegSizeInBits(*RCA);
for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) {
unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA);
@@ -281,7 +281,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
// Check if a common super-register class exists for this index pair.
const TargetRegisterClass *RC =
firstCommonClass(IA.getMask(), IB.getMask(), this);
- if (!RC || RC->getSize() < MinSize)
+ if (!RC || getRegSizeInBits(*RC) < MinSize)
continue;
// The indexes must compose identically: PreA+SubA == PreB+SubB.
@@ -290,7 +290,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
continue;
// Is RC a better candidate than BestRC?
- if (BestRC && RC->getSize() >= BestRC->getSize())
+ if (BestRC && getRegSizeInBits(*RC) >= getRegSizeInBits(*BestRC))
continue;
// Yes, RC is the smallest super-register seen so far.
@@ -299,7 +299,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
*BestPreB = IB.getSubReg();
// Bail early if we reached MinSize. We won't find a better candidate.
- if (BestRC->getSize() == MinSize)
+ if (getRegSizeInBits(*BestRC) == MinSize)
return BestRC;
}
}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index c8946010e9d1..d10ca1a7ff91 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -73,8 +73,9 @@ void VirtRegMap::grow() {
}
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
- int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
+ unsigned Size = TRI->getSpillSize(*RC);
+ unsigned Align = TRI->getSpillAlignment(*RC);
+ int SS = MF->getFrameInfo().CreateSpillStackObject(Size, Align);
++NumSpillSlots;
return SS;
}
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 85e1eaedfc61..a12f8adfafe5 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -9,6 +9,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Dwarf.h"
@@ -112,10 +113,8 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
continue;
}
while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) {
- unsigned StringOffset = AccelSection.getU32(&DataOffset);
- RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4);
- if (Reloc != Relocs.end())
- StringOffset += Reloc->second.second;
+ unsigned StringOffset =
+ getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs);
if (!StringOffset)
break;
OS << format(" Name: %08x \"%s\"\n", StringOffset,
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index bbb19b5e998d..7e8d04672c03 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -56,6 +56,16 @@ typedef DWARFDebugLine::LineTable DWARFLineTable;
typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
+uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size,
+ uint32_t *Off, const RelocAddrMap *Relocs) {
+ if (!Relocs)
+ return Data.getUnsigned(Off, Size);
+ RelocAddrMap::const_iterator AI = Relocs->find(*Off);
+ if (AI == Relocs->end())
+ return Data.getUnsigned(Off, Size);
+ return Data.getUnsigned(Off, Size) + AI->second.second;
+}
+
static void dumpAccelSection(raw_ostream &OS, StringRef Name,
const DWARFSection& Section, StringRef StringSection,
bool LittleEndian) {
@@ -212,11 +222,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
// sizes, but for simplicity we just use the address byte size of the last
// compile unit (there is no easy and fast way to associate address range
// list and the compile unit it describes).
- DataExtractor rangesData(getRangeSection(), isLittleEndian(),
+ DataExtractor rangesData(getRangeSection().Data, isLittleEndian(),
savedAddressByteSize);
offset = 0;
DWARFDebugRangeList rangeList;
- while (rangeList.extract(rangesData, &offset))
+ while (rangeList.extract(rangesData, &offset, getRangeSection().Relocs))
rangeList.dump(OS);
}
@@ -722,7 +732,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
*SectionData = data;
if (name == "debug_ranges") {
// FIXME: Use the other dwo range section when we emit it.
- RangeDWOSection = data;
+ RangeDWOSection.Data = data;
}
} else if (name == "debug_types") {
// Find debug_types data by section rather than name as there are
@@ -763,6 +773,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
.Case("debug_loc", &LocSection.Relocs)
.Case("debug_info.dwo", &InfoDWOSection.Relocs)
.Case("debug_line", &LineSection.Relocs)
+ .Case("debug_ranges", &RangeSection.Relocs)
.Case("apple_names", &AppleNamesSection.Relocs)
.Case("apple_types", &AppleTypesSection.Relocs)
.Case("apple_namespaces", &AppleNamespacesSection.Relocs)
@@ -845,7 +856,7 @@ StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) {
.Case("debug_frame", &DebugFrameSection)
.Case("eh_frame", &EHFrameSection)
.Case("debug_str", &StringSection)
- .Case("debug_ranges", &RangeSection)
+ .Case("debug_ranges", &RangeSection.Data)
.Case("debug_macinfo", &MacinfoSection)
.Case("debug_pubnames", &PubNamesSection)
.Case("debug_pubtypes", &PubTypesSection)
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index e4670519b797..ff6ed9c6741d 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Dwarf.h"
@@ -302,16 +303,9 @@ bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
// relocatable address. All of the other statement program opcodes
// that affect the address register add a delta to it. This instruction
// stores a relocatable value into it instead.
- {
- // If this address is in our relocation map, apply the relocation.
- RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr);
- if (AI != RMap->end()) {
- const std::pair<uint8_t, int64_t> &R = AI->second;
- State.Row.Address =
- debug_line_data.getAddress(offset_ptr) + R.second;
- } else
- State.Row.Address = debug_line_data.getAddress(offset_ptr);
- }
+ State.Row.Address =
+ getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(),
+ offset_ptr, RMap);
break;
case DW_LNE_define_file:
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index e2799ab2d243..d5c34216ed53 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Dwarf.h"
@@ -48,18 +49,10 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
// 2.6.2 Location Lists
// A location list entry consists of:
while (true) {
+ // A beginning and ending address offsets.
Entry E;
- RelocAddrMap::const_iterator AI = RelocMap.find(Offset);
- // 1. A beginning address offset. ...
- E.Begin = data.getUnsigned(&Offset, AddressSize);
- if (AI != RelocMap.end())
- E.Begin += AI->second.second;
-
- AI = RelocMap.find(Offset);
- // 2. An ending address offset. ...
- E.End = data.getUnsigned(&Offset, AddressSize);
- if (AI != RelocMap.end())
- E.End += AI->second.second;
+ E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
+ E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap);
// The end of any given location list is marked by an end of list entry,
// which consists of a 0 for the beginning address offset and a 0 for the
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index f1d82fda8c06..9380fe8fe85d 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
@@ -22,7 +23,8 @@ void DWARFDebugRangeList::clear() {
Entries.clear();
}
-bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) {
+bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr,
+ const RelocAddrMap &Relocs) {
clear();
if (!data.isValidOffset(*offset_ptr))
return false;
@@ -33,8 +35,11 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr) {
while (true) {
RangeListEntry entry;
uint32_t prev_offset = *offset_ptr;
- entry.StartAddress = data.getAddress(offset_ptr);
- entry.EndAddress = data.getAddress(offset_ptr);
+ entry.StartAddress =
+ getRelocatedValue(data, AddressSize, offset_ptr, &Relocs);
+ entry.EndAddress =
+ getRelocatedValue(data, AddressSize, offset_ptr, &Relocs);
+
// Check that both values were extracted correctly.
if (*offset_ptr != prev_offset + 2 * AddressSize) {
clear();
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 6de57b999adc..28592e4dfb65 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -334,11 +334,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
(Form == DW_FORM_addr)
? U->getAddressByteSize()
: U->getRefAddrByteSize();
- RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
- if (AI != U->getRelocMap()->end()) {
- Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second;
- } else
- Value.uval = data.getUnsigned(offset_ptr, AddrSize);
+ Value.uval =
+ getRelocatedValue(data, AddrSize, offset_ptr, U->getRelocMap());
break;
}
case DW_FORM_exprloc:
@@ -376,12 +373,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
case DW_FORM_ref_sup4:
case DW_FORM_strx4:
case DW_FORM_addrx4: {
- Value.uval = data.getU32(offset_ptr);
- if (!U)
- break;
- RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4);
- if (AI != U->getRelocMap()->end())
- Value.uval += AI->second.second;
+ const RelocAddrMap* RelocMap = U ? U->getRelocMap() : nullptr;
+ Value.uval = getRelocatedValue(data, 4, offset_ptr, RelocMap);
break;
}
case DW_FORM_data8:
@@ -411,11 +404,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
case DW_FORM_strp_sup: {
if (!U)
return false;
- RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
- uint8_t Size = U->getDwarfOffsetByteSize();
- Value.uval = data.getUnsigned(offset_ptr, Size);
- if (AI != U->getRelocMap()->end())
- Value.uval += AI->second.second;
+ Value.uval = getRelocatedValue(data, U->getDwarfOffsetByteSize(),
+ offset_ptr, U->getRelocMap());
break;
}
case DW_FORM_flag_present:
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index c3f467745402..f50487fc3ba3 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -32,7 +32,7 @@ using namespace llvm;
using namespace dwarf;
void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
- parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(),
+ parseImpl(C, Section, C.getDebugAbbrev(), &C.getRangeSection(),
C.getStringSection(), StringRef(), C.getAddrSection(),
C.getLineSection().Data, C.isLittleEndian(), false);
}
@@ -40,16 +40,17 @@ void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
const DWARFSection &DWOSection,
DWARFUnitIndex *Index) {
- parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(),
+ parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), &C.getRangeDWOSection(),
C.getStringDWOSection(), C.getStringOffsetDWOSection(),
C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian(),
true);
}
DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
- const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
- StringRef SOS, StringRef AOS, StringRef LS, bool LE,
- bool IsDWO, const DWARFUnitSectionBase &UnitSection,
+ const DWARFDebugAbbrev *DA, const DWARFSection *RS,
+ StringRef SS, StringRef SOS, StringRef AOS, StringRef LS,
+ bool LE, bool IsDWO,
+ const DWARFUnitSectionBase &UnitSection,
const DWARFUnitIndex::Entry *IndexEntry)
: Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
LineSection(LS), StringSection(SS), StringOffsetSection([&]() {
@@ -142,9 +143,10 @@ bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
assert(!DieArray.empty());
- DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize);
+ DataExtractor RangesData(RangeSection->Data, isLittleEndian, AddrSize);
uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
- return RangeList.extract(RangesData, &ActualRangeListOffset);
+ return RangeList.extract(RangesData, &ActualRangeListOffset,
+ RangeSection->Relocs);
}
void DWARFUnit::clear() {
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index 5e8c0bdc171d..4e2474c51cb1 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
@@ -720,7 +721,7 @@ uint32_t DIARawSymbol::getVirtualTableShapeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId);
}
-std::unique_ptr<PDBSymbolTypeVTable>
+std::unique_ptr<PDBSymbolTypeBuiltin>
DIARawSymbol::getVirtualBaseTableType() const {
CComPtr<IDiaSymbol> TableType;
if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType)
@@ -729,7 +730,7 @@ DIARawSymbol::getVirtualBaseTableType() const {
auto RawVT = llvm::make_unique<DIARawSymbol>(Session, TableType);
auto Pointer =
llvm::make_unique<PDBSymbolTypePointer>(Session, std::move(RawVT));
- return unique_dyn_cast<PDBSymbolTypeVTable>(Pointer->getPointeeType());
+ return unique_dyn_cast<PDBSymbolTypeBuiltin>(Pointer->getPointeeType());
}
PDB_DataKind DIARawSymbol::getDataKind() const {
diff --git a/lib/DebugInfo/PDB/DIA/DIASession.cpp b/lib/DebugInfo/PDB/DIA/DIASession.cpp
index 6ecf335812b5..ef47b92b4f2f 100644
--- a/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -21,12 +21,22 @@
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::pdb;
-static Error ErrorFromHResult(HRESULT Result, StringRef Context) {
+template <typename... Ts>
+static Error ErrorFromHResult(HRESULT Result, const char *Str, Ts &&... Args) {
+ SmallString<64> MessageStorage;
+ StringRef Context;
+ if (sizeof...(Args) > 0) {
+ MessageStorage = formatv(Str, std::forward<Ts>(Args)...).str();
+ Context = MessageStorage;
+ } else
+ Context = Str;
+
switch (Result) {
case E_PDB_NOT_FOUND:
return make_error<GenericError>(generic_error_code::invalid_path, Context);
@@ -95,8 +105,9 @@ Error DIASession::createFromPdb(StringRef Path,
const wchar_t *Path16Str = reinterpret_cast<const wchar_t*>(Path16.data());
HRESULT HR;
- if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str)))
- return ErrorFromHResult(HR, "Calling loadDataFromPdb");
+ if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str))) {
+ return ErrorFromHResult(HR, "Calling loadDataFromPdb {0}", Path);
+ }
if (FAILED(HR = DiaDataSource->openSession(&DiaSession)))
return ErrorFromHResult(HR, "Calling openSession");
diff --git a/lib/DebugInfo/PDB/Native/ModStream.cpp b/lib/DebugInfo/PDB/Native/ModStream.cpp
index 08798cf0ed28..e87e2c407593 100644
--- a/lib/DebugInfo/PDB/Native/ModStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModStream.cpp
@@ -82,4 +82,8 @@ ModStream::lines(bool *HadError) const {
return make_range(LineInfo.begin(HadError), LineInfo.end());
}
+bool ModStream::hasLineInfo() const {
+ return C13LinesSubstream.getLength() > 0 || LinesSubstream.getLength() > 0;
+}
+
Error ModStream::commit() { return Error::success(); }
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
index 3aba35adb53f..70968d4330b0 100644
--- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
#include "llvm/Support/ConvertUTF.h"
@@ -320,7 +321,7 @@ uint32_t NativeRawSymbol::getVirtualTableShapeId() const {
return 0;
}
-std::unique_ptr<PDBSymbolTypeVTable>
+std::unique_ptr<PDBSymbolTypeBuiltin>
NativeRawSymbol::getVirtualBaseTableType() const {
return nullptr;
}
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
index 61cef093d4ce..aacefae80c3a 100644
--- a/lib/DebugInfo/PDB/UDTLayout.cpp
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -16,6 +16,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
@@ -39,36 +40,47 @@ static uint32_t getTypeLength(const PDBSymbol &Symbol) {
return RawType.getLength();
}
-StorageItemBase::StorageItemBase(const UDTLayoutBase &Parent,
- const PDBSymbol &Symbol,
- const std::string &Name,
- uint32_t OffsetInParent, uint32_t Size)
- : Parent(Parent), Symbol(Symbol), Name(Name),
- OffsetInParent(OffsetInParent), SizeOf(Size) {
+LayoutItemBase::LayoutItemBase(const UDTLayoutBase *Parent,
+ const PDBSymbol *Symbol, const std::string &Name,
+ uint32_t OffsetInParent, uint32_t Size,
+ bool IsElided)
+ : Symbol(Symbol), Parent(Parent), Name(Name),
+ OffsetInParent(OffsetInParent), SizeOf(Size), LayoutSize(Size),
+ IsElided(IsElided) {
UsedBytes.resize(SizeOf, true);
}
-uint32_t StorageItemBase::deepPaddingSize() const {
- // sizeof(Field) - sizeof(typeof(Field)) is trailing padding.
- return SizeOf - getTypeLength(Symbol);
+uint32_t LayoutItemBase::deepPaddingSize() const {
+ return UsedBytes.size() - UsedBytes.count();
+}
+
+uint32_t LayoutItemBase::tailPadding() const {
+ int Last = UsedBytes.find_last();
+
+ return UsedBytes.size() - (Last + 1);
}
DataMemberLayoutItem::DataMemberLayoutItem(
- const UDTLayoutBase &Parent, std::unique_ptr<PDBSymbolData> DataMember)
- : StorageItemBase(Parent, *DataMember, DataMember->getName(),
- DataMember->getOffset(), getTypeLength(*DataMember)),
- DataMember(std::move(DataMember)) {
- auto Type = this->DataMember->getType();
+ const UDTLayoutBase &Parent, std::unique_ptr<PDBSymbolData> Member)
+ : LayoutItemBase(&Parent, Member.get(), Member->getName(),
+ Member->getOffset(), getTypeLength(*Member), false),
+ DataMember(std::move(Member)) {
+ auto Type = DataMember->getType();
if (auto UDT = unique_dyn_cast<PDBSymbolTypeUDT>(Type)) {
- // UDT data members might have padding in between fields, but otherwise
- // a member should occupy its entire storage.
- UsedBytes.resize(SizeOf, false);
UdtLayout = llvm::make_unique<ClassLayout>(std::move(UDT));
+ UsedBytes = UdtLayout->usedBytes();
}
}
+VBPtrLayoutItem::VBPtrLayoutItem(const UDTLayoutBase &Parent,
+ std::unique_ptr<PDBSymbolTypeBuiltin> Sym,
+ uint32_t Offset, uint32_t Size)
+ : LayoutItemBase(&Parent, Sym.get(), "<vbptr>", Offset, Size, false),
+ Type(std::move(Sym)) {
+}
+
const PDBSymbolData &DataMemberLayoutItem::getDataMember() {
- return *dyn_cast<PDBSymbolData>(&Symbol);
+ return *dyn_cast<PDBSymbolData>(Symbol);
}
bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; }
@@ -77,60 +89,73 @@ const ClassLayout &DataMemberLayoutItem::getUDTLayout() const {
return *UdtLayout;
}
-uint32_t DataMemberLayoutItem::deepPaddingSize() const {
- uint32_t Result = StorageItemBase::deepPaddingSize();
- if (UdtLayout)
- Result += UdtLayout->deepPaddingSize();
- return Result;
-}
-
VTableLayoutItem::VTableLayoutItem(const UDTLayoutBase &Parent,
- std::unique_ptr<PDBSymbolTypeVTable> VTable)
- : StorageItemBase(Parent, *VTable, "<vtbl>", 0, getTypeLength(*VTable)),
- VTable(std::move(VTable)) {
- auto VTableType = cast<PDBSymbolTypePointer>(this->VTable->getType());
+ std::unique_ptr<PDBSymbolTypeVTable> VT)
+ : LayoutItemBase(&Parent, VT.get(), "<vtbl>", 0, getTypeLength(*VT), false),
+ VTable(std::move(VT)) {
+ auto VTableType = cast<PDBSymbolTypePointer>(VTable->getType());
ElementSize = VTableType->getLength();
+}
- Shape =
- unique_dyn_cast<PDBSymbolTypeVTableShape>(VTableType->getPointeeType());
- if (Shape)
- VTableFuncs.resize(Shape->getCount());
+UDTLayoutBase::UDTLayoutBase(const UDTLayoutBase *Parent, const PDBSymbol &Sym,
+ const std::string &Name, uint32_t OffsetInParent,
+ uint32_t Size, bool IsElided)
+ : LayoutItemBase(Parent, &Sym, Name, OffsetInParent, Size, IsElided) {
+ // UDT storage comes from a union of all the children's storage, so start out
+ // uninitialized.
+ UsedBytes.reset(0, Size);
+
+ initializeChildren(Sym);
+ if (LayoutSize < Size)
+ UsedBytes.resize(LayoutSize);
}
-UDTLayoutBase::UDTLayoutBase(const PDBSymbol &Symbol, const std::string &Name,
- uint32_t Size)
- : SymbolBase(Symbol), Name(Name), SizeOf(Size) {
- UsedBytes.resize(Size);
- ChildrenPerByte.resize(Size);
- initializeChildren(Symbol);
+uint32_t UDTLayoutBase::tailPadding() const {
+ uint32_t Abs = LayoutItemBase::tailPadding();
+ if (!LayoutItems.empty()) {
+ const LayoutItemBase *Back = LayoutItems.back();
+ uint32_t ChildPadding = Back->LayoutItemBase::tailPadding();
+ if (Abs < ChildPadding)
+ Abs = 0;
+ else
+ Abs -= ChildPadding;
+ }
+ return Abs;
}
ClassLayout::ClassLayout(const PDBSymbolTypeUDT &UDT)
- : UDTLayoutBase(UDT, UDT.getName(), UDT.getLength()), UDT(UDT) {}
+ : UDTLayoutBase(nullptr, UDT, UDT.getName(), 0, UDT.getLength(), false),
+ UDT(UDT) {
+ ImmediateUsedBytes.resize(SizeOf, false);
+ for (auto &LI : LayoutItems) {
+ uint32_t Begin = LI->getOffsetInParent();
+ uint32_t End = Begin + LI->getLayoutSize();
+ End = std::min(SizeOf, End);
+ ImmediateUsedBytes.set(Begin, End);
+ }
+}
ClassLayout::ClassLayout(std::unique_ptr<PDBSymbolTypeUDT> UDT)
: ClassLayout(*UDT) {
OwnedStorage = std::move(UDT);
}
-BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent,
- std::unique_ptr<PDBSymbolTypeBaseClass> Base)
- : UDTLayoutBase(*Base, Base->getName(), Base->getLength()),
- StorageItemBase(Parent, *Base, Base->getName(), Base->getOffset(),
- Base->getLength()),
- Base(std::move(Base)) {
- IsVirtualBase = this->Base->isVirtualBaseClass();
-}
-
-uint32_t UDTLayoutBase::shallowPaddingSize() const {
- return UsedBytes.size() - UsedBytes.count();
+uint32_t ClassLayout::immediatePadding() const {
+ return SizeOf - ImmediateUsedBytes.count();
}
-uint32_t UDTLayoutBase::deepPaddingSize() const {
- uint32_t Result = shallowPaddingSize();
- for (auto &Child : ChildStorage)
- Result += Child->deepPaddingSize();
- return Result;
+BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent,
+ uint32_t OffsetInParent, bool Elide,
+ std::unique_ptr<PDBSymbolTypeBaseClass> B)
+ : UDTLayoutBase(&Parent, *B, B->getName(), OffsetInParent, B->getLength(),
+ Elide),
+ Base(std::move(B)) {
+ if (isEmptyBase()) {
+ // Special case an empty base so that it doesn't get treated as padding.
+ UsedBytes.resize(1);
+ UsedBytes.set(0);
+ }
+ IsVirtualBase = Base->isVirtualBaseClass();
}
void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
@@ -138,15 +163,16 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
// followed by functions, followed by other. This ordering is necessary
// so that bases and vtables get initialized before any functions which
// may override them.
-
UniquePtrVector<PDBSymbolTypeBaseClass> Bases;
UniquePtrVector<PDBSymbolTypeVTable> VTables;
UniquePtrVector<PDBSymbolData> Members;
+ UniquePtrVector<PDBSymbolTypeBaseClass> VirtualBaseSyms;
+
auto Children = Sym.findAllChildren();
while (auto Child = Children->getNext()) {
if (auto Base = unique_dyn_cast<PDBSymbolTypeBaseClass>(Child)) {
if (Base->isVirtualBaseClass())
- VirtualBases.push_back(std::move(Base));
+ VirtualBaseSyms.push_back(std::move(Base));
else
Bases.push_back(std::move(Base));
}
@@ -164,20 +190,33 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
Other.push_back(std::move(Child));
}
+ // We don't want to have any re-allocations in the list of bases, so make
+ // sure to reserve enough space so that our ArrayRefs don't get invalidated.
+ AllBases.reserve(Bases.size() + VirtualBaseSyms.size());
+
+ // Only add non-virtual bases to the class first. Only at the end of the
+ // class, after all non-virtual bases and data members have been added do we
+ // add virtual bases. This way the offsets are correctly aligned when we go
+ // to lay out virtual bases.
for (auto &Base : Bases) {
- auto BL = llvm::make_unique<BaseClassLayout>(*this, std::move(Base));
- BaseClasses.push_back(BL.get());
+ uint32_t Offset = Base->getOffset();
+ // Non-virtual bases never get elided.
+ auto BL = llvm::make_unique<BaseClassLayout>(*this, Offset, false,
+ std::move(Base));
+ AllBases.push_back(BL.get());
addChildToLayout(std::move(BL));
}
+ NonVirtualBases = AllBases;
- for (auto &VT : VTables) {
- auto VTLayout = llvm::make_unique<VTableLayoutItem>(*this, std::move(VT));
+ assert(VTables.size() <= 1);
+ if (!VTables.empty()) {
+ auto VTLayout =
+ llvm::make_unique<VTableLayoutItem>(*this, std::move(VTables[0]));
VTable = VTLayout.get();
addChildToLayout(std::move(VTLayout));
- continue;
}
for (auto &Data : Members) {
@@ -186,150 +225,74 @@ void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
addChildToLayout(std::move(DM));
}
- for (auto &Func : Funcs) {
- if (!Func->isVirtual())
- continue;
+ // Make sure add virtual bases before adding functions, since functions may be
+ // overrides of virtual functions declared in a virtual base, so the VTables
+ // and virtual intros need to be correctly initialized.
+ for (auto &VB : VirtualBaseSyms) {
+ int VBPO = VB->getVirtualBasePointerOffset();
+ if (!hasVBPtrAtOffset(VBPO)) {
+ if (auto VBP = VB->getRawSymbol().getVirtualBaseTableType()) {
+ auto VBPL = llvm::make_unique<VBPtrLayoutItem>(*this, std::move(VBP),
+ VBPO, VBP->getLength());
+ VBPtr = VBPL.get();
+ addChildToLayout(std::move(VBPL));
+ }
+ }
- if (Func->isIntroVirtualFunction())
- addVirtualIntro(*Func);
- else
- addVirtualOverride(*Func);
+ // Virtual bases always go at the end. So just look for the last place we
+ // ended when writing something, and put our virtual base there.
+ // Note that virtual bases get elided unless this is a top-most derived
+ // class.
+ uint32_t Offset = UsedBytes.find_last() + 1;
+ bool Elide = (Parent != nullptr);
+ auto BL =
+ llvm::make_unique<BaseClassLayout>(*this, Offset, Elide, std::move(VB));
+ AllBases.push_back(BL.get());
+
+ // Only lay this virtual base out directly inside of *this* class if this
+ // is a top-most derived class. Keep track of it regardless, but only
+ // physically lay it out if it's a topmost derived class.
+ addChildToLayout(std::move(BL));
}
+ VirtualBases = makeArrayRef(AllBases).drop_front(NonVirtualBases.size());
+
+ if (Parent != nullptr)
+ LayoutSize = UsedBytes.find_last() + 1;
}
-void UDTLayoutBase::addVirtualIntro(PDBSymbolFunc &Func) {
- // Kind of a hack, but we prefer the more common destructor name that people
- // are familiar with, e.g. ~ClassName. It seems there are always both and
- // the vector deleting destructor overwrites the nice destructor, so just
- // ignore the vector deleting destructor.
- if (Func.getName() == "__vecDelDtor")
- return;
-
- if (!VTable) {
- // FIXME: Handle this. What's most likely happening is we have an intro
- // virtual in a derived class where the base also has an intro virtual.
- // In this case the vtable lives in the base. What we really need is
- // for each UDTLayoutBase to contain a list of all its vtables, and
- // then propagate this list up the hierarchy so that derived classes have
- // direct access to their bases' vtables.
- return;
+bool UDTLayoutBase::hasVBPtrAtOffset(uint32_t Off) const {
+ if (VBPtr && VBPtr->getOffsetInParent() == Off)
+ return true;
+ for (BaseClassLayout *BL : AllBases) {
+ if (BL->hasVBPtrAtOffset(Off - BL->getOffsetInParent()))
+ return true;
}
-
- uint32_t Stride = VTable->getElementSize();
-
- uint32_t Index = Func.getVirtualBaseOffset();
- assert(Index % Stride == 0);
- Index /= Stride;
-
- VTable->setFunction(Index, Func);
+ return false;
}
-VTableLayoutItem *UDTLayoutBase::findVTableAtOffset(uint32_t RelativeOffset) {
- if (VTable && VTable->getOffsetInParent() == RelativeOffset)
- return VTable;
- for (auto Base : BaseClasses) {
- uint32_t Begin = Base->getOffsetInParent();
- uint32_t End = Begin + Base->getSize();
- if (RelativeOffset < Begin || RelativeOffset >= End)
- continue;
-
- return Base->findVTableAtOffset(RelativeOffset - Begin);
- }
+void UDTLayoutBase::addChildToLayout(std::unique_ptr<LayoutItemBase> Child) {
+ uint32_t Begin = Child->getOffsetInParent();
- return nullptr;
-}
+ if (!Child->isElided()) {
+ BitVector ChildBytes = Child->usedBytes();
-void UDTLayoutBase::addVirtualOverride(PDBSymbolFunc &Func) {
- auto Signature = Func.getSignature();
- auto ThisAdjust = Signature->getThisAdjust();
- // ThisAdjust tells us which VTable we're looking for. Specifically, it's
- // the offset into the current class of the VTable we're looking for. So
- // look through the base hierarchy until we find one such that
- // AbsoluteOffset(VT) == ThisAdjust
- VTableLayoutItem *VT = findVTableAtOffset(ThisAdjust);
- if (!VT) {
- // FIXME: There really should be a vtable here. If there's not it probably
- // means that the vtable is in a virtual base, which we don't yet support.
- assert(!VirtualBases.empty());
- return;
- }
- int32_t OverrideIndex = -1;
- // Now we've found the VTable. Func will not have a virtual base offset set,
- // so instead we need to compare names and signatures. We iterate each item
- // in the VTable. All items should already have non null entries because they
- // were initialized by the intro virtual, which was guaranteed to come before.
- for (auto ItemAndIndex : enumerate(VT->funcs())) {
- auto Item = ItemAndIndex.value();
- assert(Item);
- // If the name doesn't match, this isn't an override. Note that it's ok
- // for the return type to not match (e.g. co-variant return).
- if (Item->getName() != Func.getName()) {
- if (Item->isDestructor() && Func.isDestructor()) {
- OverrideIndex = ItemAndIndex.index();
- break;
- }
- continue;
- }
- // Now make sure it's the right overload. Get the signature of the existing
- // vtable method and make sure it has the same arglist and the same cv-ness.
- auto ExistingSig = Item->getSignature();
- if (ExistingSig->isConstType() != Signature->isConstType())
- continue;
- if (ExistingSig->isVolatileType() != Signature->isVolatileType())
- continue;
-
- // Now compare arguments. Using the raw bytes of the PDB this would be
- // trivial
- // because there is an ArgListId and they should be identical. But DIA
- // doesn't
- // expose this, so the best we can do is iterate each argument and confirm
- // that
- // each one is identical.
- if (ExistingSig->getCount() != Signature->getCount())
- continue;
- bool IsMatch = true;
- auto ExistingEnumerator = ExistingSig->getArguments();
- auto NewEnumerator = Signature->getArguments();
- for (uint32_t I = 0; I < ExistingEnumerator->getChildCount(); ++I) {
- auto ExistingArg = ExistingEnumerator->getNext();
- auto NewArg = NewEnumerator->getNext();
- if (ExistingArg->getSymIndexId() != NewArg->getSymIndexId()) {
- IsMatch = false;
- break;
- }
- }
- if (!IsMatch)
- continue;
+ // Suppose the child occupies 4 bytes starting at offset 12 in a 32 byte
+ // class. When we call ChildBytes.resize(32), the Child's storage will
+ // still begin at offset 0, so we need to shift it left by offset bytes
+ // to get it into the right position.
+ ChildBytes.resize(UsedBytes.size());
+ ChildBytes <<= Child->getOffsetInParent();
+ UsedBytes |= ChildBytes;
- // It's a match! Stick the new function into the VTable.
- OverrideIndex = ItemAndIndex.index();
- break;
- }
- if (OverrideIndex == -1) {
- // FIXME: This is probably due to one of the other FIXMEs in this file.
- return;
- }
- VT->setFunction(OverrideIndex, Func);
-}
+ if (ChildBytes.count() > 0) {
+ auto Loc = std::upper_bound(LayoutItems.begin(), LayoutItems.end(), Begin,
+ [](uint32_t Off, const LayoutItemBase *Item) {
+ return (Off < Item->getOffsetInParent());
+ });
-void UDTLayoutBase::addChildToLayout(std::unique_ptr<StorageItemBase> Child) {
- uint32_t Begin = Child->getOffsetInParent();
- uint32_t End = Begin + Child->getSize();
- // Due to the empty base optimization, End might point outside the bounds of
- // the parent class. If that happens, just clamp the value.
- End = std::min(End, getClassSize());
-
- UsedBytes.set(Begin, End);
- while (Begin != End) {
- ChildrenPerByte[Begin].push_back(Child.get());
- ++Begin;
+ LayoutItems.insert(Loc, Child.get());
+ }
}
- auto Loc = std::upper_bound(
- ChildStorage.begin(), ChildStorage.end(), Begin,
- [](uint32_t Off, const std::unique_ptr<StorageItemBase> &Item) {
- return Off < Item->getOffsetInParent();
- });
-
- ChildStorage.insert(Loc, std::move(Child));
+ ChildStorage.push_back(std::move(Child));
} \ No newline at end of file
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 59cef04cdece..b886021aee3f 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -1,6 +1,18 @@
-set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}")
-# Disable the coverage and sanitizer instrumentation for the fuzzer itself.
-set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror")
+include(CheckCXXSourceCompiles)
+
+if( APPLE )
+ CHECK_CXX_SOURCE_COMPILES("
+ static thread_local int blah;
+ int main() {
+ return 0;
+ }
+ " HAS_THREAD_LOCAL)
+
+ if( NOT HAS_THREAD_LOCAL )
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Dthread_local=__thread")
+ endif()
+endif()
+
if( LLVM_USE_SANITIZE_COVERAGE )
if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address")
message(FATAL_ERROR
@@ -8,41 +20,50 @@ if( LLVM_USE_SANITIZE_COVERAGE )
"LLVM_USE_SANITIZE_COVERAGE=YES to be set."
)
endif()
+ set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}")
+
+ # Disable the coverage and sanitizer instrumentation for the fuzzer itself.
+ set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror")
+endif()
+
+# Compile libFuzzer if the compilation is specifically requested, OR
+# if the platform is known to be working.
+if ( LLVM_USE_SANITIZE_COVERAGE OR CMAKE_SYSTEM_NAME MATCHES "Darwin|Linux" )
add_library(LLVMFuzzerNoMainObjects OBJECT
- FuzzerCrossOver.cpp
- FuzzerDriver.cpp
- FuzzerExtFunctionsDlsym.cpp
- FuzzerExtFunctionsDlsymWin.cpp
- FuzzerExtFunctionsWeak.cpp
- FuzzerExtraCounters.cpp
- FuzzerIO.cpp
- FuzzerIOPosix.cpp
- FuzzerIOWindows.cpp
- FuzzerLoop.cpp
- FuzzerMerge.cpp
- FuzzerMutate.cpp
- FuzzerSHA1.cpp
- FuzzerShmemPosix.cpp
- FuzzerShmemWindows.cpp
- FuzzerTracePC.cpp
- FuzzerTraceState.cpp
- FuzzerUtil.cpp
- FuzzerUtilDarwin.cpp
- FuzzerUtilLinux.cpp
- FuzzerUtilPosix.cpp
- FuzzerUtilWindows.cpp
- )
+ FuzzerCrossOver.cpp
+ FuzzerDriver.cpp
+ FuzzerExtFunctionsDlsym.cpp
+ FuzzerExtFunctionsDlsymWin.cpp
+ FuzzerExtFunctionsWeak.cpp
+ FuzzerExtraCounters.cpp
+ FuzzerIO.cpp
+ FuzzerIOPosix.cpp
+ FuzzerIOWindows.cpp
+ FuzzerLoop.cpp
+ FuzzerMerge.cpp
+ FuzzerMutate.cpp
+ FuzzerSHA1.cpp
+ FuzzerShmemPosix.cpp
+ FuzzerShmemWindows.cpp
+ FuzzerTracePC.cpp
+ FuzzerTraceState.cpp
+ FuzzerUtil.cpp
+ FuzzerUtilDarwin.cpp
+ FuzzerUtilLinux.cpp
+ FuzzerUtilPosix.cpp
+ FuzzerUtilWindows.cpp
+ )
add_library(LLVMFuzzerNoMain STATIC
- $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
- )
+ $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
+ )
target_link_libraries(LLVMFuzzerNoMain ${LLVM_PTHREAD_LIB})
add_library(LLVMFuzzer STATIC
- FuzzerMain.cpp
- $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
- )
+ FuzzerMain.cpp
+ $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
+ )
target_link_libraries(LLVMFuzzer ${LLVM_PTHREAD_LIB})
+endif()
- if( LLVM_INCLUDE_TESTS )
- add_subdirectory(test)
- endif()
+if( LLVM_USE_SANITIZE_COVERAGE AND LLVM_INCLUDE_TESTS )
+ add_subdirectory(test)
endif()
diff --git a/lib/Fuzzer/FuzzerDefs.h b/lib/Fuzzer/FuzzerDefs.h
index bd1827508002..27f5719236dd 100644
--- a/lib/Fuzzer/FuzzerDefs.h
+++ b/lib/Fuzzer/FuzzerDefs.h
@@ -36,17 +36,29 @@
#error "Support for your platform has not been implemented"
#endif
+#ifndef __has_attribute
+# define __has_attribute(x) 0
+#endif
+
#define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX
#ifdef __x86_64
-#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+# if __has_attribute(target)
+# define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+# else
+# define ATTRIBUTE_TARGET_POPCNT
+# endif
#else
-#define ATTRIBUTE_TARGET_POPCNT
+# define ATTRIBUTE_TARGET_POPCNT
#endif
#ifdef __clang__ // avoid gcc warning.
-# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
+# if __has_attribute(no_sanitize)
+# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
+# else
+# define ATTRIBUTE_NO_SANITIZE_MEMORY
+# endif
# define ALWAYS_INLINE __attribute__((always_inline))
#else
# define ATTRIBUTE_NO_SANITIZE_MEMORY
diff --git a/lib/Fuzzer/FuzzerMerge.h b/lib/Fuzzer/FuzzerMerge.h
index cf4a0863571d..dd4c37b6e39c 100644
--- a/lib/Fuzzer/FuzzerMerge.h
+++ b/lib/Fuzzer/FuzzerMerge.h
@@ -69,7 +69,7 @@ struct Merger {
size_t Merge(const std::set<uint32_t> &InitialFeatures,
std::vector<std::string> *NewFiles);
size_t Merge(std::vector<std::string> *NewFiles) {
- return Merge({}, NewFiles);
+ return Merge(std::set<uint32_t>{}, NewFiles);
}
size_t ApproximateMemoryConsumption() const;
std::set<uint32_t> AllFeatures() const;
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index d0b77e7218b9..b7de07170de9 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1103,35 +1103,34 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() ||
- &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) {
+ const APFloat &APF = CFP->getValueAPF();
+ if (&APF.getSemantics() == &APFloat::IEEEsingle() ||
+ &APF.getSemantics() == &APFloat::IEEEdouble()) {
// We would like to output the FP constant value in exponential notation,
// but we cannot do this if doing so will lose precision. Check here to
// make sure that we only output it in exponential format if we can parse
// the value back and get the same value.
//
bool ignored;
- bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble();
- bool isInf = CFP->getValueAPF().isInfinity();
- bool isNaN = CFP->getValueAPF().isNaN();
+ bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble();
+ bool isInf = APF.isInfinity();
+ bool isNaN = APF.isNaN();
if (!isInf && !isNaN) {
- double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
- CFP->getValueAPF().convertToFloat();
+ double Val = isDouble ? APF.convertToDouble() : APF.convertToFloat();
SmallString<128> StrVal;
- raw_svector_ostream(StrVal) << Val;
-
+ APF.toString(StrVal, 6, 0, false);
// Check to make sure that the stringized number is not some string like
// "Inf" or NaN, that atof will accept, but the lexer will not. Check
// that the string matches the "[-+]?[0-9]" regex.
//
- if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
- ((StrVal[0] == '-' || StrVal[0] == '+') &&
- (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
- // Reparse stringized version!
- if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) {
- Out << StrVal;
- return;
- }
+ assert(((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ "[-+]?[0-9] regex does not match!");
+ // Reparse stringized version!
+ if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) {
+ Out << StrVal;
+ return;
}
}
// Otherwise we could not reparse it to exactly the same value, so we must
@@ -1140,7 +1139,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
// x86, so we must not use these types.
static_assert(sizeof(double) == sizeof(uint64_t),
"assuming that double is 64 bits!");
- APFloat apf = CFP->getValueAPF();
+ APFloat apf = APF;
// Floats are represented in ASCII IR as double, convert.
if (!isDouble)
apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
@@ -1153,27 +1152,27 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
// These appear as a magic letter identifying the type, then a
// fixed number of hex digits.
Out << "0x";
- APInt API = CFP->getValueAPF().bitcastToAPInt();
- if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) {
+ APInt API = APF.bitcastToAPInt();
+ if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) {
Out << 'K';
Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
/*Upper=*/true);
Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
/*Upper=*/true);
return;
- } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) {
+ } else if (&APF.getSemantics() == &APFloat::IEEEquad()) {
Out << 'L';
Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
/*Upper=*/true);
Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
/*Upper=*/true);
- } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) {
+ } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) {
Out << 'M';
Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
/*Upper=*/true);
Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
/*Upper=*/true);
- } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) {
+ } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) {
Out << 'H';
Out << format_hex_no_prefix(API.getZExtValue(), 4,
/*Upper=*/true);
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 09f037365793..cf2925254695 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -255,17 +255,10 @@ public:
/// \brief Retrieve the attribute set node for the given "slot" in the
/// AttrNode list.
- AttributeSet getSlotNode(unsigned Slot) const {
+ AttributeSet getSlotAttributes(unsigned Slot) const {
return getSlotPair(Slot)->second;
}
- /// \brief Retrieve the attributes for the given "slot" in the AttrNode list.
- /// \p Slot is an index into the AttrNodes list, not the index of the return /
- /// parameter/ function which the attributes apply to.
- AttributeList getSlotAttributes(unsigned Slot) const {
- return AttributeList::get(Context, *getSlotPair(Slot));
- }
-
/// \brief Return true if the AttributeSet or the FunctionIndex has an
/// enum attribute of the given kind.
bool hasFnAttribute(Attribute::AttrKind Kind) const {
@@ -273,8 +266,10 @@ public:
}
typedef AttributeSet::iterator iterator;
- iterator begin(unsigned Slot) const { return getSlotNode(Slot).begin(); }
- iterator end(unsigned Slot) const { return getSlotNode(Slot).end(); }
+ iterator begin(unsigned Slot) const {
+ return getSlotAttributes(Slot).begin();
+ }
+ iterator end(unsigned Slot) const { return getSlotAttributes(Slot).end(); }
void Profile(FoldingSetNodeID &ID) const;
static void Profile(FoldingSetNodeID &ID,
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index d690111ef210..e30414537a6c 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -955,13 +955,13 @@ AttributeList AttributeList::addAttribute(LLVMContext &C,
for (unsigned Index : Indices) {
// Add all attribute slots before the current index.
for (; I < E && getSlotIndex(I) < Index; ++I)
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
// Add the attribute at this index. If we already have attributes at this
// index, merge them into a new set.
AttrBuilder B;
if (I < E && getSlotIndex(I) == Index) {
- B.merge(AttrBuilder(pImpl->getSlotNode(I)));
+ B.merge(AttrBuilder(pImpl->getSlotAttributes(I)));
++I;
}
B.addAttribute(A);
@@ -970,7 +970,7 @@ AttributeList AttributeList::addAttribute(LLVMContext &C,
// Add remaining attributes.
for (; I < E; ++I)
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
return get(C, AttrVec);
}
@@ -1008,13 +1008,13 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
for (I = 0; I < NumAttrs; ++I) {
if (getSlotIndex(I) >= Index)
break;
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
}
AttrBuilder NewAttrs;
if (I < NumAttrs && getSlotIndex(I) == Index) {
// We need to merge the attribute sets.
- NewAttrs.merge(pImpl->getSlotNode(I));
+ NewAttrs.merge(pImpl->getSlotAttributes(I));
++I;
}
NewAttrs.merge(B);
@@ -1024,7 +1024,7 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
// Add the remaining entries.
for (; I < NumAttrs; ++I)
- AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I));
return get(C, AttrVec);
}
@@ -1063,11 +1063,11 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
if (getSlotIndex(I) >= Index) {
if (getSlotIndex(I) == Index)
- B = AttrBuilder(pImpl->getSlotNode(LastIndex++));
+ B = AttrBuilder(getSlotAttributes(LastIndex++));
break;
}
LastIndex = I + 1;
- AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)});
+ AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)});
}
// Remove the attributes from the existing set and add them.
@@ -1077,7 +1077,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
// Add the remaining attribute slots.
for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
- AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)});
+ AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)});
return get(C, AttrSets);
}
@@ -1091,7 +1091,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C,
for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) {
unsigned Index = getSlotIndex(I);
if (Index != WithoutIndex)
- AttrSet.push_back({Index, pImpl->getSlotNode(I)});
+ AttrSet.push_back({Index, pImpl->getSlotAttributes(I)});
}
return get(C, AttrSet);
}
@@ -1220,7 +1220,7 @@ AttributeSet AttributeList::getAttributes(unsigned Index) const {
// Loop through to find the attribute node we want.
for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I)
if (pImpl->getSlotIndex(I) == Index)
- return pImpl->getSlotNode(I);
+ return pImpl->getSlotAttributes(I);
return AttributeSet();
}
@@ -1251,7 +1251,7 @@ unsigned AttributeList::getSlotIndex(unsigned Slot) const {
return pImpl->getSlotIndex(Slot);
}
-AttributeList AttributeList::getSlotAttributes(unsigned Slot) const {
+AttributeSet AttributeList::getSlotAttributes(unsigned Slot) const {
assert(pImpl && Slot < pImpl->getNumSlots() &&
"Slot # out of range!");
return pImpl->getSlotAttributes(Slot);
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index ba92a91cc917..d4b455228225 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -589,8 +589,12 @@ FileInfo::openCoveragePath(StringRef CoveragePath) {
/// print - Print source files with collected line count information.
void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
StringRef GCNOFile, StringRef GCDAFile) {
- for (const auto &LI : LineInfo) {
- StringRef Filename = LI.first();
+ SmallVector<StringRef, 4> Filenames;
+ for (const auto &LI : LineInfo)
+ Filenames.push_back(LI.first());
+ std::sort(Filenames.begin(), Filenames.end());
+
+ for (StringRef Filename : Filenames) {
auto AllLines = LineConsumer(Filename);
std::string CoveragePath = getCoveragePath(Filename, MainFilename);
@@ -603,7 +607,7 @@ void FileInfo::print(raw_ostream &InfoOS, StringRef MainFilename,
CovOS << " -: 0:Runs:" << RunCount << "\n";
CovOS << " -: 0:Programs:" << ProgramCount << "\n";
- const LineData &Line = LI.second;
+ const LineData &Line = LineInfo[Filename];
GCOVCoverage FileCoverage(Filename);
for (uint32_t LineIndex = 0; LineIndex < Line.LastLine || !AllLines.empty();
++LineIndex) {
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index b07c57685a26..d83bdf2acd43 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -432,6 +432,7 @@ namespace {
enum PointerStripKind {
PSK_ZeroIndices,
PSK_ZeroIndicesAndAliases,
+ PSK_ZeroIndicesAndAliasesAndBarriers,
PSK_InBoundsConstantIndices,
PSK_InBounds
};
@@ -450,6 +451,7 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
if (auto *GEP = dyn_cast<GEPOperator>(V)) {
switch (StripKind) {
case PSK_ZeroIndicesAndAliases:
+ case PSK_ZeroIndicesAndAliasesAndBarriers:
case PSK_ZeroIndices:
if (!GEP->hasAllZeroIndices())
return V;
@@ -472,12 +474,20 @@ static const Value *stripPointerCastsAndOffsets(const Value *V) {
return V;
V = GA->getAliasee();
} else {
- if (auto CS = ImmutableCallSite(V))
+ if (auto CS = ImmutableCallSite(V)) {
if (const Value *RV = CS.getReturnedArgOperand()) {
V = RV;
continue;
}
-
+ // The result of invariant.group.barrier must alias it's argument,
+ // but it can't be marked with returned attribute, that's why it needs
+ // special case.
+ if (StripKind == PSK_ZeroIndicesAndAliasesAndBarriers &&
+ CS.getIntrinsicID() == Intrinsic::invariant_group_barrier) {
+ V = CS.getArgOperand(0);
+ continue;
+ }
+ }
return V;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
@@ -499,6 +509,11 @@ const Value *Value::stripInBoundsConstantOffsets() const {
return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
}
+const Value *Value::stripPointerCastsAndBarriers() const {
+ return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliasesAndBarriers>(
+ this);
+}
+
const Value *
Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
APInt &Offset) const {
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 9782c898bf50..1bc0d7361d4c 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -415,7 +415,8 @@ void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym,
// Flag as visible outside of ThinLTO if visible from a regular object or
// if this is a reference in the regular LTO partition.
GlobalRes.VisibleOutsideThinLTO |=
- (Res.VisibleToRegularObj || (Partition == GlobalResolution::RegularLTO));
+ (Res.VisibleToRegularObj || Sym.isUsed() ||
+ Partition == GlobalResolution::RegularLTO);
}
static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index 4bd251f727a4..30447c528af1 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -25,7 +25,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTO.h"
-#include "llvm/LTO/legacy/UpdateCompilerUsed.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Passes/PassBuilder.h"
@@ -353,19 +352,6 @@ finalizeOptimizationRemarks(std::unique_ptr<tool_output_file> DiagOutputFile) {
DiagOutputFile->os().flush();
}
-static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) {
- // Collect the list of undefined symbols used in asm and update
- // llvm.compiler.used to prevent optimization to drop these from the output.
- StringSet<> AsmUndefinedRefs;
- ModuleSymbolTable::CollectAsmSymbols(
- Mod,
- [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
- if (Flags & object::BasicSymbolRef::SF_Undefined)
- AsmUndefinedRefs.insert(Name);
- });
- updateCompilerUsed(Mod, TM, AsmUndefinedRefs);
-}
-
Error lto::backend(Config &C, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
std::unique_ptr<Module> Mod,
@@ -377,8 +363,6 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
std::unique_ptr<TargetMachine> TM =
createTargetMachine(C, Mod->getTargetTriple(), *TOrErr);
- handleAsmUndefinedRefs(*Mod, *TM);
-
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupOptimizationRemarks(
Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
@@ -416,8 +400,6 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
std::unique_ptr<TargetMachine> TM =
createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr);
- handleAsmUndefinedRefs(Mod, *TM);
-
if (Conf.CodeGenOnly) {
codegen(Conf, TM.get(), AddStream, Task, Mod);
return Error::success();
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 42e8ad340281..2fa9c03b608e 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -134,7 +134,7 @@ struct ParseStatementInfo {
SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
- ParseStatementInfo() = default;
+ ParseStatementInfo() = delete;
ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
: AsmRewrites(rewrites) {}
};
@@ -737,6 +737,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
HadError = false;
AsmCond StartingCondState = TheCondState;
+ SmallVector<AsmRewrite, 4> AsmStrRewrites;
// If we are generating dwarf for assembly source files save the initial text
// section and generate a .file directive.
@@ -756,7 +757,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// While we have input, parse each statement.
while (Lexer.isNot(AsmToken::Eof)) {
- ParseStatementInfo Info;
+ ParseStatementInfo Info(&AsmStrRewrites);
if (!parseStatement(Info, nullptr))
continue;
@@ -1650,7 +1651,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
}
// Emit the label.
- if (!ParsingInlineAsm)
+ if (!getTargetParser().isParsingInlineAsm())
Out.EmitLabel(Sym, IDLoc);
// If we are generating dwarf for assembly source files then gather the
@@ -2057,9 +2058,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// If parsing succeeded, match the instruction.
if (!ParseHadError) {
uint64_t ErrorInfo;
- if (getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
- Info.ParsedOperands, Out,
- ErrorInfo, ParsingInlineAsm))
+ if (getTargetParser().MatchAndEmitInstruction(
+ IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo,
+ getTargetParser().isParsingInlineAsm()))
return true;
}
return false;
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 159cc3b4def2..6444046a30d7 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -1105,7 +1105,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
encodeULEB128(wasm::WASM_SEC_CODE, getStream());
- encodeULEB128(CodeRelocations.size(), getStream());
+ encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
WriteRelocations(CodeRelocations, getStream(), SymbolIndices);
WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index 23682e1fabfd..e89a4a315c46 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -1,4 +1,4 @@
-//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===//
+//===- ELF.cpp - ELF object file implementation ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,15 +8,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/ELF.h"
+#include "llvm/Support/ELF.h"
-namespace llvm {
-namespace object {
+using namespace llvm;
+using namespace object;
#define ELF_RELOC(name, value) \
case ELF::name: \
return #name; \
-StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
+StringRef llvm::object::getELFRelocationTypeName(uint32_t Machine,
+ uint32_t Type) {
switch (Machine) {
case ELF::EM_X86_64:
switch (Type) {
@@ -139,6 +141,3 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
}
#undef ELF_RELOC
-
-} // end namespace object
-} // end namespace llvm
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 3f8c81c8e911..86f033bb6cbf 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -1,4 +1,4 @@
-//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//===- ELFObjectFile.cpp - ELF object file implementation -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,27 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ELFTypes.h"
+#include "llvm/Object/Error.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMAttributeParser.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
-namespace llvm {
+using namespace llvm;
using namespace object;
ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source)
@@ -299,5 +314,3 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
TheTriple.setArchName(Triple);
}
-
-} // end namespace llvm
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
index bb3d1b2cf695..5f0837882d60 100644
--- a/lib/Object/IRSymtab.cpp
+++ b/lib/Object/IRSymtab.cpp
@@ -1,4 +1,4 @@
-//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===//
+//===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,34 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/IRSymtab.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ObjectUtils.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/IRSymtab.h"
#include "llvm/Object/ModuleSymbolTable.h"
+#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/StringSaver.h"
+#include <cassert>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace irsymtab;
@@ -25,6 +45,7 @@ namespace {
struct Builder {
SmallVector<char, 0> &Symtab;
SmallVector<char, 0> &Strtab;
+
Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab)
: Symtab(Symtab), Strtab(Strtab) {}
@@ -49,6 +70,7 @@ struct Builder {
S.Offset = StrtabBuilder.add(Value);
S.Size = Value.size();
}
+
template <typename T>
void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
R.Offset = Symtab.size();
@@ -141,6 +163,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
Sym.ComdatIndex = -1;
auto *GV = Msym.dyn_cast<GlobalValue *>();
if (!GV) {
+ // Undefined module asm symbols act as GC roots and are implicitly used.
+ if (Flags & object::BasicSymbolRef::SF_Undefined)
+ Sym.Flags |= 1 << storage::Symbol::FB_used;
setStr(Sym.IRName, "");
return Error::success();
}
@@ -228,7 +253,7 @@ Error Builder::build(ArrayRef<Module *> IRMods) {
return Error::success();
}
-} // anonymous namespace
+} // end anonymous namespace
Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
SmallVector<char, 0> &Strtab) {
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 1753d2baaedd..3d3fa07db3f4 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -1,4 +1,4 @@
-//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===//
+//===- MachOObjectFile.cpp - Mach-O object file binding -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,32 +12,52 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/MachO.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MachO.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include <cctype>
+#include "llvm/Support/SwapByteOrder.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
#include <cstring>
#include <limits>
#include <list>
+#include <memory>
+#include <string>
+#include <system_error>
using namespace llvm;
using namespace object;
namespace {
+
struct section_base {
char sectname[16];
char segname[16];
};
-}
+
+} // end anonymous namespace
static Error
malformedError(Twine Msg) {
@@ -1144,11 +1164,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
bool Is64bits, Error &Err,
uint32_t UniversalCputype,
uint32_t UniversalIndex)
- : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
- SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
- DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr),
- DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr),
- HasPageZeroSegment(false) {
+ : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object) {
ErrorAsOutParameter ErrAsOutParam(&Err);
uint64_t SizeOfHeaders;
uint32_t cputype;
@@ -2343,11 +2359,11 @@ StringRef MachOObjectFile::getFileFormatName() const {
unsigned CPUType = getCPUType(*this);
if (!is64Bit()) {
switch (CPUType) {
- case llvm::MachO::CPU_TYPE_I386:
+ case MachO::CPU_TYPE_I386:
return "Mach-O 32-bit i386";
- case llvm::MachO::CPU_TYPE_ARM:
+ case MachO::CPU_TYPE_ARM:
return "Mach-O arm";
- case llvm::MachO::CPU_TYPE_POWERPC:
+ case MachO::CPU_TYPE_POWERPC:
return "Mach-O 32-bit ppc";
default:
return "Mach-O 32-bit unknown";
@@ -2355,11 +2371,11 @@ StringRef MachOObjectFile::getFileFormatName() const {
}
switch (CPUType) {
- case llvm::MachO::CPU_TYPE_X86_64:
+ case MachO::CPU_TYPE_X86_64:
return "Mach-O 64-bit x86-64";
- case llvm::MachO::CPU_TYPE_ARM64:
+ case MachO::CPU_TYPE_ARM64:
return "Mach-O arm64";
- case llvm::MachO::CPU_TYPE_POWERPC64:
+ case MachO::CPU_TYPE_POWERPC64:
return "Mach-O 64-bit ppc64";
default:
return "Mach-O 64-bit unknown";
@@ -2368,17 +2384,17 @@ StringRef MachOObjectFile::getFileFormatName() const {
Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
switch (CPUType) {
- case llvm::MachO::CPU_TYPE_I386:
+ case MachO::CPU_TYPE_I386:
return Triple::x86;
- case llvm::MachO::CPU_TYPE_X86_64:
+ case MachO::CPU_TYPE_X86_64:
return Triple::x86_64;
- case llvm::MachO::CPU_TYPE_ARM:
+ case MachO::CPU_TYPE_ARM:
return Triple::arm;
- case llvm::MachO::CPU_TYPE_ARM64:
+ case MachO::CPU_TYPE_ARM64:
return Triple::aarch64;
- case llvm::MachO::CPU_TYPE_POWERPC:
+ case MachO::CPU_TYPE_POWERPC:
return Triple::ppc;
- case llvm::MachO::CPU_TYPE_POWERPC64:
+ case MachO::CPU_TYPE_POWERPC64:
return Triple::ppc64;
default:
return Triple::UnknownArch;
@@ -2571,8 +2587,7 @@ dice_iterator MachOObjectFile::end_dices() const {
return dice_iterator(DiceRef(DRI, this));
}
-ExportEntry::ExportEntry(ArrayRef<uint8_t> T)
- : Trie(T), Malformed(false), Done(false) {}
+ExportEntry::ExportEntry(ArrayRef<uint8_t> T) : Trie(T) {}
void ExportEntry::moveToFirst() {
pushNode(0);
@@ -2641,9 +2656,7 @@ uint32_t ExportEntry::nodeOffset() const {
}
ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
- : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
- ImportName(nullptr), ChildCount(0), NextChildIndex(0),
- ParentStringLength(0), IsExportNode(false) {}
+ : Start(Ptr), Current(Ptr) {}
void ExportEntry::pushNode(uint64_t offset) {
const uint8_t *Ptr = Trie.begin() + offset;
@@ -2733,7 +2746,7 @@ void ExportEntry::moveNext() {
iterator_range<export_iterator>
MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
ExportEntry Start(Trie);
- if (Trie.size() == 0)
+ if (Trie.empty())
Start.moveToEnd();
else
Start.moveToFirst();
@@ -2750,9 +2763,8 @@ iterator_range<export_iterator> MachOObjectFile::exports() const {
MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit)
- : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0),
- SegmentIndex(-1), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
- PointerSize(is64Bit ? 8 : 4), Done(false) {}
+ : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
+ PointerSize(is64Bit ? 8 : 4) {}
void MachORebaseEntry::moveToFirst() {
Ptr = Opcodes.begin();
@@ -2794,7 +2806,7 @@ void MachORebaseEntry::moveNext() {
More = false;
Done = true;
moveToEnd();
- DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n");
+ DEBUG_WITH_TYPE("mach-o-rebase", dbgs() << "REBASE_OPCODE_DONE\n");
break;
case MachO::REBASE_OPCODE_SET_TYPE_IMM:
RebaseType = ImmValue;
@@ -2807,8 +2819,8 @@ void MachORebaseEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: "
- << "RebaseType=" << (int) RebaseType << "\n");
+ dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: "
+ << "RebaseType=" << (int) RebaseType << "\n");
break;
case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
SegmentIndex = ImmValue;
@@ -2831,10 +2843,10 @@ void MachORebaseEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
- << "SegmentIndex=" << SegmentIndex << ", "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << "\n");
+ dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
+ << "SegmentIndex=" << SegmentIndex << ", "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
SegmentOffset += readULEB128(&error);
@@ -2855,9 +2867,9 @@ void MachORebaseEntry::moveNext() {
return;
}
DEBUG_WITH_TYPE("mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: "
- << format("SegmentOffset=0x%06X",
- SegmentOffset) << "\n");
+ dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
@@ -2881,9 +2893,9 @@ void MachORebaseEntry::moveNext() {
return;
}
DEBUG_WITH_TYPE("mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: "
- << format("SegmentOffset=0x%06X",
- SegmentOffset) << "\n");
+ dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
@@ -2913,11 +2925,11 @@ void MachORebaseEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << ", AdvanceAmount=" << AdvanceAmount
- << ", RemainingLoopCount=" << RemainingLoopCount
- << "\n");
+ dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
@@ -2954,11 +2966,11 @@ void MachORebaseEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << ", AdvanceAmount=" << AdvanceAmount
- << ", RemainingLoopCount=" << RemainingLoopCount
- << "\n");
+ dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
@@ -2992,11 +3004,11 @@ void MachORebaseEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << ", AdvanceAmount=" << AdvanceAmount
- << ", RemainingLoopCount=" << RemainingLoopCount
- << "\n");
+ dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
@@ -3041,11 +3053,11 @@ void MachORebaseEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-rebase",
- llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << ", AdvanceAmount=" << AdvanceAmount
- << ", RemainingLoopCount=" << RemainingLoopCount
- << "\n");
+ dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
return;
default:
*E = malformedError("bad rebase info (bad opcode value 0x" +
@@ -3131,10 +3143,8 @@ iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) {
MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O,
ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK)
- : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0),
- SegmentIndex(-1), LibraryOrdinalSet(false), Ordinal(0), Flags(0),
- Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0),
- PointerSize(is64Bit ? 8 : 4), TableKind(BK), Done(false) {}
+ : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()),
+ PointerSize(is64Bit ? 8 : 4), TableKind(BK) {}
void MachOBindEntry::moveToFirst() {
Ptr = Opcodes.begin();
@@ -3189,7 +3199,7 @@ void MachOBindEntry::moveNext() {
}
More = false;
moveToEnd();
- DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n");
+ DEBUG_WITH_TYPE("mach-o-bind", dbgs() << "BIND_OPCODE_DONE\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
if (TableKind == Kind::Weak) {
@@ -3211,8 +3221,8 @@ void MachOBindEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: "
- << "Ordinal=" << Ordinal << "\n");
+ dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: "
+ << "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
if (TableKind == Kind::Weak) {
@@ -3241,8 +3251,8 @@ void MachOBindEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: "
- << "Ordinal=" << Ordinal << "\n");
+ dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: "
+ << "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
if (TableKind == Kind::Weak) {
@@ -3267,8 +3277,8 @@ void MachOBindEntry::moveNext() {
Ordinal = 0;
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: "
- << "Ordinal=" << Ordinal << "\n");
+ dbgs() << "BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: "
+ << "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
Flags = ImmValue;
@@ -3288,8 +3298,8 @@ void MachOBindEntry::moveNext() {
++Ptr;
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: "
- << "SymbolName=" << SymbolName << "\n");
+ dbgs() << "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: "
+ << "SymbolName=" << SymbolName << "\n");
if (TableKind == Kind::Weak) {
if (ImmValue & MachO::BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
return;
@@ -3306,8 +3316,8 @@ void MachOBindEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: "
- << "BindType=" << (int)BindType << "\n");
+ dbgs() << "BIND_OPCODE_SET_TYPE_IMM: "
+ << "BindType=" << (int)BindType << "\n");
break;
case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
Addend = readSLEB128(&error);
@@ -3320,8 +3330,8 @@ void MachOBindEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: "
- << "Addend=" << Addend << "\n");
+ dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: "
+ << "Addend=" << Addend << "\n");
break;
case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
SegmentIndex = ImmValue;
@@ -3343,10 +3353,10 @@ void MachOBindEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
- << "SegmentIndex=" << SegmentIndex << ", "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << "\n");
+ dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
+ << "SegmentIndex=" << SegmentIndex << ", "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << "\n");
break;
case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
SegmentOffset += readULEB128(&error);
@@ -3366,9 +3376,9 @@ void MachOBindEntry::moveNext() {
return;
}
DEBUG_WITH_TYPE("mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: "
- << format("SegmentOffset=0x%06X",
- SegmentOffset) << "\n");
+ dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
break;
case MachO::BIND_OPCODE_DO_BIND:
AdvanceAmount = PointerSize;
@@ -3395,9 +3405,9 @@ void MachOBindEntry::moveNext() {
return;
}
DEBUG_WITH_TYPE("mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_DO_BIND: "
- << format("SegmentOffset=0x%06X",
- SegmentOffset) << "\n");
+ dbgs() << "BIND_OPCODE_DO_BIND: "
+ << format("SegmentOffset=0x%06X",
+ SegmentOffset) << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
if (TableKind == Kind::Lazy) {
@@ -3452,11 +3462,11 @@ void MachOBindEntry::moveNext() {
RemainingLoopCount = 0;
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << ", AdvanceAmount=" << AdvanceAmount
- << ", RemainingLoopCount=" << RemainingLoopCount
- << "\n");
+ dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
if (TableKind == Kind::Lazy) {
@@ -3501,10 +3511,9 @@ void MachOBindEntry::moveNext() {
return;
}
DEBUG_WITH_TYPE("mach-o-bind",
- llvm::dbgs()
+ dbgs()
<< "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: "
- << format("SegmentOffset=0x%06X",
- SegmentOffset) << "\n");
+ << format("SegmentOffset=0x%06X", SegmentOffset) << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
if (TableKind == Kind::Lazy) {
@@ -3568,11 +3577,11 @@ void MachOBindEntry::moveNext() {
}
DEBUG_WITH_TYPE(
"mach-o-bind",
- llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: "
- << format("SegmentOffset=0x%06X", SegmentOffset)
- << ", AdvanceAmount=" << AdvanceAmount
- << ", RemainingLoopCount=" << RemainingLoopCount
- << "\n");
+ dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: "
+ << format("SegmentOffset=0x%06X", SegmentOffset)
+ << ", AdvanceAmount=" << AdvanceAmount
+ << ", RemainingLoopCount=" << RemainingLoopCount
+ << "\n");
return;
default:
*E = malformedError("bad bind info (bad opcode value 0x" +
diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp
index de1ddab88fd4..91f93a41032e 100644
--- a/lib/Object/ModuleSummaryIndexObjectFile.cpp
+++ b/lib/Object/ModuleSummaryIndexObjectFile.cpp
@@ -1,4 +1,4 @@
-//===- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation ==//
+//==- ModuleSummaryIndexObjectFile.cpp - Summary index file implementation -==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,29 +11,38 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/ModuleSummaryIndex.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <memory>
+#include <system_error>
+
using namespace llvm;
using namespace object;
-static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
- "ignore-empty-index-file", llvm::cl::ZeroOrMore,
- llvm::cl::desc(
+static cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
+ "ignore-empty-index-file", cl::ZeroOrMore,
+ cl::desc(
"Ignore an empty index file and perform non-ThinLTO compilation"),
- llvm::cl::init(false));
+ cl::init(false));
ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile(
MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I)
: SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) {
}
-ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() {}
+ModuleSummaryIndexObjectFile::~ModuleSummaryIndexObjectFile() = default;
std::unique_ptr<ModuleSummaryIndex> ModuleSummaryIndexObjectFile::takeIndex() {
return std::move(Index);
diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp
index 9a935d8e0869..a5b42725d817 100644
--- a/lib/Object/ModuleSymbolTable.cpp
+++ b/lib/Object/ModuleSymbolTable.cpp
@@ -1,4 +1,4 @@
-//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===//
+//===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,27 +13,45 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/IRObjectFile.h"
#include "RecordStreamer.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/IR/GVMaterializer.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Object/ModuleSymbolTable.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+
using namespace llvm;
using namespace object;
diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp
index c9c27451f809..e94e9cfed394 100644
--- a/lib/Object/RecordStreamer.cpp
+++ b/lib/Object/RecordStreamer.cpp
@@ -9,6 +9,7 @@
#include "RecordStreamer.h"
#include "llvm/MC/MCSymbol.h"
+
using namespace llvm;
void RecordStreamer::markDefined(const MCSymbol &Symbol) {
@@ -69,14 +70,14 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) {
void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); }
+RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
+
RecordStreamer::const_iterator RecordStreamer::begin() {
return Symbols.begin();
}
RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
-RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
-
void RecordStreamer::EmitInstruction(const MCInst &Inst,
const MCSubtargetInfo &STI, bool) {
MCStreamer::EmitInstruction(Inst, STI);
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
index a845ecd786a8..4d119091a3d2 100644
--- a/lib/Object/RecordStreamer.h
+++ b/lib/Object/RecordStreamer.h
@@ -1,4 +1,4 @@
-//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===//
+//===- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,9 +10,16 @@
#ifndef LLVM_LIB_OBJECT_RECORDSTREAMER_H
#define LLVM_LIB_OBJECT_RECORDSTREAMER_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/SMLoc.h"
+#include <vector>
namespace llvm {
+
class RecordStreamer : public MCStreamer {
public:
enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used,
@@ -24,16 +31,19 @@ private:
// their symbol binding after parsing complete. This maps from each
// aliasee to its list of aliases.
DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap;
+
void markDefined(const MCSymbol &Symbol);
void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute);
void markUsed(const MCSymbol &Symbol);
void visitUsedSymbol(const MCSymbol &Sym) override;
public:
- typedef StringMap<State>::const_iterator const_iterator;
+ RecordStreamer(MCContext &Context);
+
+ using const_iterator = StringMap<State>::const_iterator;
+
const_iterator begin();
const_iterator end();
- RecordStreamer(MCContext &Context);
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
bool) override;
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
@@ -50,6 +60,7 @@ public:
DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() {
return SymverAliasMap;
}
+
/// Get the state recorded for the given symbol.
State getSymbolState(const MCSymbol *Sym) {
auto SI = Symbols.find(Sym->getName());
@@ -58,5 +69,7 @@ public:
return SI->second;
}
};
-}
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_OBJECT_RECORDSTREAMER_H
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index fc1dca35424e..9f3486e58a11 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
@@ -22,7 +23,9 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Wasm.h"
#include <algorithm>
+#include <cassert>
#include <cstdint>
+#include <cstring>
#include <system_error>
using namespace llvm;
@@ -141,7 +144,7 @@ static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) {
Expr.Value.Float64 = readFloat64(Ptr);
break;
case wasm::WASM_OPCODE_GET_GLOBAL:
- Expr.Value.Global = readUint32(Ptr);
+ Expr.Value.Global = readULEB128(Ptr);
break;
default:
return make_error<GenericBinaryError>("Invalid opcode in init_expr",
@@ -180,7 +183,7 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr,
}
WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
- : ObjectFile(Binary::ID_Wasm, Buffer), StartFunction(-1) {
+ : ObjectFile(Binary::ID_Wasm, Buffer) {
ErrorAsOutParameter ErrAsOutParam(&Err);
Header.Magic = getData().substr(0, 4);
if (Header.Magic != StringRef("\0asm", 4)) {
@@ -252,7 +255,7 @@ Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
while (Count--) {
/*uint32_t Index =*/readVaruint32(Ptr);
StringRef Name = readString(Ptr);
- if (Name.size())
+ if (!Name.empty())
Symbols.emplace_back(Name,
WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME);
}
@@ -313,11 +316,12 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr,
case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB:
break;
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
- Reloc.Addend = readVaruint32(Ptr);
+ Reloc.Addend = readVarint32(Ptr);
break;
default:
return make_error<GenericBinaryError>("Bad relocation type",
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
index 3e1bed19d61f..9b1ff7e5dc16 100644
--- a/lib/ObjectYAML/WasmYAML.cpp
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -223,7 +223,7 @@ void MappingTraits<WasmYAML::Relocation>::mapping(
IO.mapRequired("Type", Relocation.Type);
IO.mapRequired("Index", Relocation.Index);
IO.mapRequired("Offset", Relocation.Offset);
- IO.mapRequired("Addend", Relocation.Addend);
+ IO.mapOptional("Addend", Relocation.Addend, 0);
}
void MappingTraits<WasmYAML::LocalDecl>::mapping(
@@ -294,6 +294,9 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
case wasm::WASM_OPCODE_F64_CONST:
IO.mapRequired("Value", Expr.Value.Float64);
break;
+ case wasm::WASM_OPCODE_GET_GLOBAL:
+ IO.mapRequired("Index", Expr.Value.Global);
+ break;
}
}
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 0421946a32a6..55ac2541948e 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -624,6 +624,10 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// And finally clean up LCSSA form before generating code.
OptimizePM.addPass(InstSimplifierPass());
+ // LoopSink (and other loop passes since the last simplifyCFG) might have
+ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+ OptimizePM.addPass(SimplifyCFGPass());
+
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index c4c892f0352a..e1e2c22e1df1 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -3393,7 +3393,7 @@ namespace {
}
void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
- unsigned FormatMaxPadding) const {
+ unsigned FormatMaxPadding, bool TruncateZero) const {
switch (category) {
case fcInfinity:
if (isNegative())
@@ -3407,9 +3407,16 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
if (isNegative())
Str.push_back('-');
- if (!FormatMaxPadding)
- append(Str, "0.0E+0");
- else
+ if (!FormatMaxPadding) {
+ if (TruncateZero)
+ append(Str, "0.0E+0");
+ else {
+ append(Str, "0.0");
+ if (FormatPrecision > 1)
+ Str.append(FormatPrecision - 1, '0');
+ append(Str, "e+00");
+ }
+ } else
Str.push_back('0');
return;
@@ -3543,12 +3550,16 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
Str.push_back(buffer[NDigits-1]);
Str.push_back('.');
- if (NDigits == 1)
+ if (NDigits == 1 && TruncateZero)
Str.push_back('0');
else
for (unsigned I = 1; I != NDigits; ++I)
Str.push_back(buffer[NDigits-1-I]);
- Str.push_back('E');
+ // Fill with zeros up to FormatPrecision.
+ if (!TruncateZero && FormatPrecision > NDigits - 1)
+ Str.append(FormatPrecision - NDigits + 1, '0');
+ // For !TruncateZero we use lower 'e'.
+ Str.push_back(TruncateZero ? 'E' : 'e');
Str.push_back(exp >= 0 ? '+' : '-');
if (exp < 0) exp = -exp;
@@ -3557,6 +3568,9 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
expbuf.push_back((char) ('0' + (exp % 10)));
exp /= 10;
} while (exp);
+ // Exponent always at least two digits if we do not truncate zeros.
+ if (!TruncateZero && expbuf.size() < 2)
+ expbuf.push_back('0');
for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
Str.push_back(expbuf[E-1-I]);
return;
@@ -4362,10 +4376,11 @@ bool DoubleAPFloat::isInteger() const {
void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
unsigned FormatPrecision,
- unsigned FormatMaxPadding) const {
+ unsigned FormatMaxPadding,
+ bool TruncateZero) const {
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
- .toString(Str, FormatPrecision, FormatMaxPadding);
+ .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
}
bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 2d049a1cff85..1227d7528c8f 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -81,7 +81,7 @@ void APInt::initSlowCase(uint64_t val, bool isSigned) {
pVal[0] = val;
if (isSigned && int64_t(val) < 0)
for (unsigned i = 1; i < getNumWords(); ++i)
- pVal[i] = -1ULL;
+ pVal[i] = WORD_MAX;
clearUnusedBits();
}
@@ -364,44 +364,20 @@ bool APInt::EqualSlowCase(const APInt& RHS) const {
return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
}
-bool APInt::ult(const APInt& RHS) const {
+int APInt::compare(const APInt& RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
if (isSingleWord())
- return VAL < RHS.VAL;
+ return VAL < RHS.VAL ? -1 : VAL > RHS.VAL;
- // Get active bit length of both operands
- unsigned n1 = getActiveBits();
- unsigned n2 = RHS.getActiveBits();
-
- // If magnitude of LHS is less than RHS, return true.
- if (n1 < n2)
- return true;
-
- // If magnitude of RHS is greater than LHS, return false.
- if (n2 < n1)
- return false;
-
- // If they both fit in a word, just compare the low order word
- if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
- return pVal[0] < RHS.pVal[0];
-
- // Otherwise, compare all words
- unsigned topWord = whichWord(std::max(n1,n2)-1);
- for (int i = topWord; i >= 0; --i) {
- if (pVal[i] > RHS.pVal[i])
- return false;
- if (pVal[i] < RHS.pVal[i])
- return true;
- }
- return false;
+ return tcCompare(pVal, RHS.pVal, getNumWords());
}
-bool APInt::slt(const APInt& RHS) const {
+int APInt::compareSigned(const APInt& RHS) const {
assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
if (isSingleWord()) {
int64_t lhsSext = SignExtend64(VAL, BitWidth);
int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth);
- return lhsSext < rhsSext;
+ return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
}
bool lhsNeg = isNegative();
@@ -409,11 +385,11 @@ bool APInt::slt(const APInt& RHS) const {
// If the sign bits don't match, then (LHS < RHS) if LHS is negative
if (lhsNeg != rhsNeg)
- return lhsNeg;
+ return lhsNeg ? -1 : 1;
// Otherwise we can just use an unsigned comparison, because even negative
// numbers compare correctly this way if both have the same signed-ness.
- return ult(RHS);
+ return tcCompare(pVal, RHS.pVal, getNumWords());
}
void APInt::setBit(unsigned bitPosition) {
@@ -428,13 +404,13 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
unsigned hiWord = whichWord(hiBit);
// Create an initial mask for the low word with zeros below loBit.
- uint64_t loMask = UINT64_MAX << whichBit(loBit);
+ uint64_t loMask = WORD_MAX << whichBit(loBit);
// If hiBit is not aligned, we need a high mask.
unsigned hiShiftAmt = whichBit(hiBit);
if (hiShiftAmt != 0) {
// Create a high mask with zeros above hiBit.
- uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
+ uint64_t hiMask = WORD_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
// If loWord and hiWord are equal, then we combine the masks. Otherwise,
// set the bits in hiWord.
if (hiWord == loWord)
@@ -447,7 +423,7 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
// Fill any words between loWord and hiWord with all ones.
for (unsigned word = loWord + 1; word < hiWord; ++word)
- pVal[word] = UINT64_MAX;
+ pVal[word] = WORD_MAX;
}
/// Set the given bit to 0 whose position is given as "bitPosition".
@@ -487,7 +463,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// Single word result can be done as a direct bitmask.
if (isSingleWord()) {
- uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
VAL &= ~(mask << bitPosition);
VAL |= (subBits.VAL << bitPosition);
return;
@@ -499,7 +475,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// Insertion within a single word can be done as a direct bitmask.
if (loWord == hi1Word) {
- uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
pVal[loWord] &= ~(mask << loBit);
pVal[loWord] |= (subBits.VAL << loBit);
return;
@@ -515,7 +491,7 @@ void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
// Mask+insert remaining bits.
unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
if (remainingBits != 0) {
- uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits);
+ uint64_t mask = WORD_MAX >> (APINT_BITS_PER_WORD - remainingBits);
pVal[hi1Word] &= ~mask;
pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
}
@@ -682,7 +658,7 @@ unsigned APInt::countLeadingOnes() const {
unsigned Count = llvm::countLeadingOnes(pVal[i] << shift);
if (Count == highWordBits) {
for (i--; i >= 0; --i) {
- if (pVal[i] == -1ULL)
+ if (pVal[i] == WORD_MAX)
Count += APINT_BITS_PER_WORD;
else {
Count += llvm::countLeadingOnes(pVal[i]);
@@ -708,11 +684,12 @@ unsigned APInt::countTrailingZeros() const {
unsigned APInt::countTrailingOnesSlowCase() const {
unsigned Count = 0;
unsigned i = 0;
- for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
+ for (; i < getNumWords() && pVal[i] == WORD_MAX; ++i)
Count += APINT_BITS_PER_WORD;
if (i < getNumWords())
Count += llvm::countTrailingOnes(pVal[i]);
- return std::min(Count, BitWidth);
+ assert(Count <= BitWidth);
+ return Count;
}
unsigned APInt::countPopulationSlowCase() const {
@@ -962,43 +939,26 @@ APInt APInt::trunc(unsigned width) const {
}
// Sign extend to a new width.
-APInt APInt::sext(unsigned width) const {
- assert(width > BitWidth && "Invalid APInt SignExtend request");
+APInt APInt::sext(unsigned Width) const {
+ assert(Width > BitWidth && "Invalid APInt SignExtend request");
- if (width <= APINT_BITS_PER_WORD) {
- uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
- val = (int64_t)val >> (width - BitWidth);
- return APInt(width, val >> (APINT_BITS_PER_WORD - width));
- }
-
- APInt Result(getMemory(getNumWords(width)), width);
-
- // Copy full words.
- unsigned i;
- uint64_t word = 0;
- for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
- word = getRawData()[i];
- Result.pVal[i] = word;
- }
+ if (Width <= APINT_BITS_PER_WORD)
+ return APInt(Width, SignExtend64(VAL, BitWidth));
- // Read and sign-extend any partial word.
- unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
- if (bits != 0)
- word = (int64_t)getRawData()[i] << bits >> bits;
- else
- word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+ APInt Result(getMemory(getNumWords(Width)), Width);
- // Write remaining full words.
- for (; i != width / APINT_BITS_PER_WORD; i++) {
- Result.pVal[i] = word;
- word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
- }
+ // Copy words.
+ std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
- // Write any partial word.
- bits = (0 - width) % APINT_BITS_PER_WORD;
- if (bits != 0)
- Result.pVal[i] = word << bits >> bits;
+ // Sign extend the last word since there may be unused bits in the input.
+ Result.pVal[getNumWords() - 1] =
+ SignExtend64(Result.pVal[getNumWords() - 1],
+ ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
+ // Fill with sign bits.
+ std::memset(Result.pVal + getNumWords(), isNegative() ? -1 : 0,
+ (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
+ Result.clearUnusedBits();
return Result;
}
@@ -1012,12 +972,11 @@ APInt APInt::zext(unsigned width) const {
APInt Result(getMemory(getNumWords(width)), width);
// Copy words.
- unsigned i;
- for (i = 0; i != getNumWords(); i++)
- Result.pVal[i] = getRawData()[i];
+ std::memcpy(Result.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
// Zero remaining words.
- memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+ std::memset(Result.pVal + getNumWords(), 0,
+ (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
return Result;
}
@@ -1052,89 +1011,51 @@ APInt APInt::sextOrSelf(unsigned width) const {
/// Arithmetic right-shift this APInt by shiftAmt.
/// @brief Arithmetic right-shift function.
-APInt APInt::ashr(const APInt &shiftAmt) const {
- return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+void APInt::ashrInPlace(const APInt &shiftAmt) {
+ ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
}
/// Arithmetic right-shift this APInt by shiftAmt.
/// @brief Arithmetic right-shift function.
-APInt APInt::ashr(unsigned shiftAmt) const {
- assert(shiftAmt <= BitWidth && "Invalid shift amount");
- // Handle a degenerate case
- if (shiftAmt == 0)
- return *this;
-
- // Handle single word shifts with built-in ashr
- if (isSingleWord()) {
- if (shiftAmt == BitWidth)
- return APInt(BitWidth, 0); // undefined
- return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt);
- }
+void APInt::ashrSlowCase(unsigned ShiftAmt) {
+ // Don't bother performing a no-op shift.
+ if (!ShiftAmt)
+ return;
- // If all the bits were shifted out, the result is, technically, undefined.
- // We return -1 if it was negative, 0 otherwise. We check this early to avoid
- // issues in the algorithm below.
- if (shiftAmt == BitWidth) {
- if (isNegative())
- return APInt(BitWidth, -1ULL, true);
- else
- return APInt(BitWidth, 0);
- }
-
- // Create some space for the result.
- uint64_t * val = new uint64_t[getNumWords()];
-
- // Compute some values needed by the following shift algorithms
- unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
- unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
- unsigned breakWord = getNumWords() - 1 - offset; // last word affected
- unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
- if (bitsInWord == 0)
- bitsInWord = APINT_BITS_PER_WORD;
-
- // If we are shifting whole words, just move whole words
- if (wordShift == 0) {
- // Move the words containing significant bits
- for (unsigned i = 0; i <= breakWord; ++i)
- val[i] = pVal[i+offset]; // move whole word
-
- // Adjust the top significant word for sign bit fill, if negative
- if (isNegative())
- if (bitsInWord < APINT_BITS_PER_WORD)
- val[breakWord] |= ~0ULL << bitsInWord; // set high bits
- } else {
- // Shift the low order words
- for (unsigned i = 0; i < breakWord; ++i) {
- // This combines the shifted corresponding word with the low bits from
- // the next word (shifted into this word's high bits).
- val[i] = (pVal[i+offset] >> wordShift) |
- (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
- }
+ // Save the original sign bit for later.
+ bool Negative = isNegative();
- // Shift the break word. In this case there are no bits from the next word
- // to include in this word.
- val[breakWord] = pVal[breakWord+offset] >> wordShift;
-
- // Deal with sign extension in the break word, and possibly the word before
- // it.
- if (isNegative()) {
- if (wordShift > bitsInWord) {
- if (breakWord > 0)
- val[breakWord-1] |=
- ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
- val[breakWord] |= ~0ULL;
- } else
- val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+ // WordShift is the inter-part shift; BitShift is is intra-part shift.
+ unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
+ unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
+
+ unsigned WordsToMove = getNumWords() - WordShift;
+ if (WordsToMove != 0) {
+ // Sign extend the last word to fill in the unused bits.
+ pVal[getNumWords() - 1] = SignExtend64(
+ pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
+
+ // Fastpath for moving by whole words.
+ if (BitShift == 0) {
+ std::memmove(pVal, pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
+ } else {
+ // Move the words containing significant bits.
+ for (unsigned i = 0; i != WordsToMove - 1; ++i)
+ pVal[i] = (pVal[i + WordShift] >> BitShift) |
+ (pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
+
+ // Handle the last word which has no high bits to copy.
+ pVal[WordsToMove - 1] = pVal[WordShift + WordsToMove - 1] >> BitShift;
+ // Sign extend one more time.
+ pVal[WordsToMove - 1] =
+ SignExtend64(pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
}
}
- // Remaining words are 0 or -1, just assign them.
- uint64_t fillValue = (isNegative() ? -1ULL : 0);
- for (unsigned i = breakWord+1; i < getNumWords(); ++i)
- val[i] = fillValue;
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
+ // Fill in the remainder based on the original sign.
+ std::memset(pVal + WordsToMove, Negative ? -1 : 0,
+ WordShift * APINT_WORD_SIZE);
+ clearUnusedBits();
}
/// Logical right-shift this APInt by shiftAmt.
@@ -2608,7 +2529,7 @@ void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
if (!Count)
return;
- /* WordShift is the inter-part shift; BitShift is is intra-part shift. */
+ // WordShift is the inter-part shift; BitShift is the intra-part shift.
unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
unsigned BitShift = Count % APINT_BITS_PER_WORD;
@@ -2635,7 +2556,7 @@ void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
if (!Count)
return;
- // WordShift is the inter-part shift; BitShift is is intra-part shift.
+ // WordShift is the inter-part shift; BitShift is the intra-part shift.
unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
unsigned BitShift = Count % APINT_BITS_PER_WORD;
@@ -2684,10 +2605,8 @@ int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
unsigned parts) {
while (parts) {
parts--;
- if (lhs[parts] == rhs[parts])
- continue;
-
- return (lhs[parts] > rhs[parts]) ? 1 : -1;
+ if (lhs[parts] != rhs[parts])
+ return (lhs[parts] > rhs[parts]) ? 1 : -1;
}
return 0;
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 92ce6185306a..22fb3f2cb9c9 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -193,4 +193,3 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName) {
void LLVMAddSymbol(const char *symbolName, void *symbolValue) {
return llvm::sys::DynamicLibrary::AddSymbol(symbolName, symbolValue);
}
-
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 64d5977e2ebd..f3a654d7d2bd 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -161,6 +161,7 @@ StringRef Triple::getVendorTypeName(VendorType Kind) {
case Myriad: return "myriad";
case AMD: return "amd";
case Mesa: return "mesa";
+ case SUSE: return "suse";
}
llvm_unreachable("Invalid VendorType!");
@@ -443,6 +444,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("myriad", Triple::Myriad)
.Case("amd", Triple::AMD)
.Case("mesa", Triple::Mesa)
+ .Case("suse", Triple::SUSE)
.Default(Triple::UnknownVendor);
}
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index d0c0956b87ca..629ad5c61b78 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -942,6 +942,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
AArch64::XZR, NextMBBI);
case AArch64::CMP_SWAP_128:
return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
+
}
return false;
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 550174b22a89..dc916c034661 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1125,7 +1125,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF))
BasePointerReg = RegInfo->getBaseRegister();
- bool ExtraCSSpill = false;
+ unsigned ExtraCSSpill = 0;
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1153,7 +1153,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(PairedReg);
if (AArch64::GPR64RegClass.contains(PairedReg) &&
!RegInfo->isReservedReg(MF, PairedReg))
- ExtraCSSpill = true;
+ ExtraCSSpill = PairedReg;
}
}
@@ -1186,8 +1186,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// register scavenging. If we already spilled an extra callee-saved register
// above to keep the number of spills even, we don't need to do anything else
// here.
- if (BigStack && !ExtraCSSpill) {
- if (UnspilledCSGPR != AArch64::NoRegister) {
+ if (BigStack) {
+ if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
<< " to get a scratch register.\n");
SavedRegs.set(UnspilledCSGPR);
@@ -1196,15 +1196,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// store the pair.
if (produceCompactUnwindFrame(MF))
SavedRegs.set(UnspilledCSGPRPaired);
- ExtraCSSpill = true;
+ ExtraCSSpill = UnspilledCSGPRPaired;
NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
// an emergency spill slot.
- if (!ExtraCSSpill) {
- const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
- int FI = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false);
+ if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterClass &RC = AArch64::GPR64RegClass;
+ unsigned Size = TRI->getSpillSize(RC);
+ unsigned Align = TRI->getSpillAlignment(RC);
+ int FI = MFI.CreateStackObject(Size, Align, false);
RS->addScavengingFrameIndex(FI);
DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
<< " as the emergency spill slot.\n");
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4ddc95199d4c..a7c98fbb425f 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -91,6 +91,7 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
+STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
@@ -105,6 +106,12 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
+static cl::opt<bool>
+EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
+ cl::desc("Enable AArch64 logical imm instruction "
+ "optimization"),
+ cl::init(true));
+
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
@@ -787,6 +794,140 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
return VT.changeVectorElementTypeToInteger();
}
+static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
+ const APInt &Demanded,
+ TargetLowering::TargetLoweringOpt &TLO,
+ unsigned NewOpc) {
+ uint64_t OldImm = Imm, NewImm, Enc;
+ uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
+
+ // Return if the immediate is already all zeros, all ones, a bimm32 or a
+ // bimm64.
+ if (Imm == 0 || Imm == Mask ||
+ AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
+ return false;
+
+ unsigned EltSize = Size;
+ uint64_t DemandedBits = Demanded.getZExtValue();
+
+ // Clear bits that are not demanded.
+ Imm &= DemandedBits;
+
+ while (true) {
+ // The goal here is to set the non-demanded bits in a way that minimizes
+ // the number of switching between 0 and 1. In order to achieve this goal,
+ // we set the non-demanded bits to the value of the preceding demanded bits.
+ // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
+ // non-demanded bit), we copy bit0 (1) to the least significant 'x',
+ // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
+ // The final result is 0b11000011.
+ uint64_t NonDemandedBits = ~DemandedBits;
+ uint64_t InvertedImm = ~Imm & DemandedBits;
+ uint64_t RotatedImm =
+ ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
+ NonDemandedBits;
+ uint64_t Sum = RotatedImm + NonDemandedBits;
+ bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
+ uint64_t Ones = (Sum + Carry) & NonDemandedBits;
+ NewImm = (Imm | Ones) & Mask;
+
+ // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
+ // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
+ // we halve the element size and continue the search.
+ if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
+ break;
+
+ // We cannot shrink the element size any further if it is 2-bits.
+ if (EltSize == 2)
+ return false;
+
+ EltSize /= 2;
+ Mask >>= EltSize;
+ uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
+
+ // Return if there is mismatch in any of the demanded bits of Imm and Hi.
+ if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
+ return false;
+
+ // Merge the upper and lower halves of Imm and DemandedBits.
+ Imm |= Hi;
+ DemandedBits |= DemandedBitsHi;
+ }
+
+ ++NumOptimizedImms;
+
+ // Replicate the element across the register width.
+ while (EltSize < Size) {
+ NewImm |= NewImm << EltSize;
+ EltSize *= 2;
+ }
+
+ (void)OldImm;
+ assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
+ "demanded bits should never be altered");
+ assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
+
+ // Create the new constant immediate node.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ // If the new constant immediate is all-zeros or all-ones, let the target
+ // independent DAG combine optimize this node.
+ if (NewImm == 0 || NewImm == OrigMask)
+ return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT));
+
+ // Otherwise, create a machine node so that target independent DAG combine
+ // doesn't undo this optimization.
+ Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
+ SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
+ SDValue New(
+ TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
+
+ return TLO.CombineTo(Op, New);
+}
+
+bool AArch64TargetLowering::targetShrinkDemandedConstant(
+ SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
+ // Delay this optimization to as late as possible.
+ if (!TLO.LegalOps)
+ return false;
+
+ if (!EnableOptimizeLogicalImm)
+ return false;
+
+ EVT VT = Op.getValueType();
+ if (VT.isVector())
+ return false;
+
+ unsigned Size = VT.getSizeInBits();
+ assert((Size == 32 || Size == 64) &&
+ "i32 or i64 is expected after legalization.");
+
+ // Exit early if we demand all bits.
+ if (Demanded.countPopulation() == Size)
+ return false;
+
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ default:
+ return false;
+ case ISD::AND:
+ NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
+ break;
+ case ISD::OR:
+ NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
+ break;
+ case ISD::XOR:
+ NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
+ break;
+ }
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!C)
+ return false;
+ uint64_t Imm = C->getZExtValue();
+ return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
+}
+
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
@@ -3418,11 +3559,75 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Other Lowering Code
//===----------------------------------------------------------------------===//
+SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flag);
+}
+
+SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
+}
+
+// (loadGOT sym)
+template <class NodeTy>
+SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const {
+ DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT);
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes instead of using a wrapper node.
+ return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
+}
+
+// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG)
+ const {
+ DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ const unsigned char MO_NC = AArch64II::MO_NC;
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, Ty,
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G3),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC),
+ getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC));
+}
+
+// (addlow (adrp %hi(sym)) %lo(sym))
+template <class NodeTy>
+SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
+ DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE);
+ SDValue Lo = getTargetNode(N, Ty, DAG,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
+ return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
+}
+
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
- const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+ GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
@@ -3430,32 +3635,15 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
- // This also catched the large code model case for Darwin.
+ // This also catches the large code model case for Darwin.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
- SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
- // FIXME: Once remat is capable of dealing with instructions with register
- // operands, expand this into two nodes instead of using a wrapper node.
- return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+ return getGOT(GN, DAG);
}
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(GN, DAG);
} else {
- // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
- // the only correct model on Darwin.
- SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- OpFlags | AArch64II::MO_PAGE);
- unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
- SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
-
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(GN, DAG);
}
}
@@ -4232,90 +4420,37 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(JT, DAG);
}
-
- SDValue Hi =
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
- SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(JT, DAG);
}
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
// Use the GOT for the large code model on iOS.
if (Subtarget->isTargetMachO()) {
- SDValue GotAddr = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
- AArch64II::MO_GOT);
- return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+ return getGOT(CP, DAG);
}
-
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G3),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G2 | MO_NC),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G1 | MO_NC),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(CP, DAG);
} else {
- // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
- // ELF, the only valid one on Darwin.
- SDValue Hi =
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), AArch64II::MO_PAGE);
- SDValue Lo = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(CP, DAG);
}
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc DL(Op);
+ BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
- const unsigned char MO_NC = AArch64II::MO_NC;
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+ return getAddrLarge(BA, DAG);
} else {
- SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
- SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC);
- SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ return getAddr(BA, DAG);
}
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index a023b4373835..6081b07479b9 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -255,6 +255,9 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+ TargetLoweringOpt &TLO) const override;
+
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
/// Returns true if the target allows unaligned memory accesses of the
@@ -508,6 +511,18 @@ private:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+ SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+ SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+ SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+ template <class NodeTy> SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const;
+ template <class NodeTy>
+ SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const;
+ template <class NodeTy> SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
index 867074c3c374..71826bec6b11 100644
--- a/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -14,6 +14,9 @@
//===----------------------------------
// Atomic fences
//===----------------------------------
+let AddedComplexity = 15, Size = 0 in
+def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering),
+ [(atomic_fence imm:$ordering, 0)]>, Sched<[]>;
def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 16be4432b160..c44daf306ea9 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -693,11 +693,11 @@ def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
def gi_addsub_shifted_imm32 :
- GIComplexOperandMatcher<s32, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexOperandMatcher<s32, "selectArithImmed">,
GIComplexPatternEquiv<addsub_shifted_imm32>;
def gi_addsub_shifted_imm64 :
- GIComplexOperandMatcher<s64, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexOperandMatcher<s64, "selectArithImmed">,
GIComplexPatternEquiv<addsub_shifted_imm64>;
class neg_addsub_shifted_imm<ValueType Ty>
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 41fc8eceab5c..cb268828455e 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2320,7 +2320,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
unsigned Opc = 0;
bool Offset = true;
- switch (RC->getSize()) {
+ switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
Opc = AArch64::STRBui;
@@ -2424,7 +2424,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
unsigned Opc = 0;
bool Offset = true;
- switch (RC->getSize()) {
+ switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRBui;
@@ -2649,7 +2649,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
};
if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
- assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() &&
+ assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
+ TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
"Mismatched register size in non subreg COPY");
if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
@@ -2735,7 +2736,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
}
if (FillRC) {
- assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() &&
+ assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
+ TRI.getRegSizeInBits(*FillRC) &&
"Mismatched regclass size on folded subreg COPY");
loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
MachineInstr &LoadMI = *--InsertPt;
@@ -3025,7 +3027,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
return false;
}
-void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(AArch64::HINT);
NopInst.addOperand(MCOperand::createImm(0));
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index bacce441f6c5..4cd14db633b9 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -205,7 +205,7 @@ public:
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
unsigned FalseReg) const override;
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ void getNoop(MCInst &NopInst) const override;
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 5e01b6cd2b46..b0e0e3eb4ba7 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -41,12 +41,17 @@ using namespace llvm;
namespace {
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI);
+ void beginFunction(const MachineFunction &MF) override;
bool select(MachineInstr &I) const override;
private:
@@ -62,14 +67,19 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
- bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1,
- MachineOperand &Result2) const;
+ ComplexRendererFn selectArithImmed(MachineOperand &Root) const;
const AArch64TargetMachine &TM;
const AArch64Subtarget &STI;
const AArch64InstrInfo &TII;
const AArch64RegisterInfo &TRI;
const AArch64RegisterBankInfo &RBI;
+ bool ForCodeSize;
+
+ PredicateBitset AvailableFeatures;
+ PredicateBitset
+ computeAvailableFeatures(const MachineFunction *MF,
+ const AArch64Subtarget *Subtarget) const;
// We declare the temporaries used by selectImpl() in the class to minimize the
// cost of constructing placeholder values.
@@ -88,7 +98,7 @@ AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
: InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI)
+ TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures()
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
@@ -567,6 +577,12 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
return true;
}
+void AArch64InstructionSelector::beginFunction(
+ const MachineFunction &MF) {
+ ForCodeSize = MF.getFunction()->optForSize();
+ AvailableFeatures = computeAvailableFeatures(&MF, &STI);
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -1312,9 +1328,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
/// SelectArithImmed - Select an immediate value that can be represented as
/// a 12-bit value shifted left by either 0 or 12. If so, return true with
/// Val set to the 12-bit value and Shift set to the shifter operand.
-bool AArch64InstructionSelector::selectArithImmed(
- MachineOperand &Root, MachineOperand &Result1,
- MachineOperand &Result2) const {
+InstructionSelector::ComplexRendererFn
+AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
MachineInstr &MI = *Root.getParent();
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -1333,13 +1348,13 @@ bool AArch64InstructionSelector::selectArithImmed(
else if (Root.isReg()) {
MachineInstr *Def = MRI.getVRegDef(Root.getReg());
if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
- return false;
+ return nullptr;
MachineOperand &Op1 = Def->getOperand(1);
if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
- return false;
+ return nullptr;
Immed = Op1.getCImm()->getZExtValue();
} else
- return false;
+ return nullptr;
unsigned ShiftAmt;
@@ -1349,14 +1364,10 @@ bool AArch64InstructionSelector::selectArithImmed(
ShiftAmt = 12;
Immed = Immed >> 12;
} else
- return false;
+ return nullptr;
unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
- Result1.ChangeToImmediate(Immed);
- Result1.clearParent();
- Result2.ChangeToImmediate(ShVal);
- Result2.clearParent();
- return true;
+ return [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); };
}
namespace llvm {
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index eec089087fe0..cf1c0b66db58 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -79,14 +79,14 @@ def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
-def : WriteRes<WriteST, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
- { let Latency = 3; let NumMicroOps = 3; }
+def : WriteRes<WriteST, [FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
{ let Latency = 0; let NumMicroOps = 2; }
-def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 5; }
+def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 1; }
def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
-def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
- { let Latency = 4; let NumMicroOps = 3; }
+def : WriteRes<WriteSTIdx, [FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
{ let Latency = 3; let NumMicroOps = 2; }
def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index 4bd77d344488..8f8eeef8a6cf 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -326,6 +326,10 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
// SIMD Store Instructions
// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>;
+def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>;
+
def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
@@ -421,6 +425,7 @@ def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+
// FP Miscellaneous Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
@@ -433,7 +438,6 @@ def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
-
// Load Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
@@ -461,6 +465,7 @@ def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post
def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
+
// Miscellaneous Data-Processing Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
@@ -502,28 +507,30 @@ def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>;
-def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
+def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
-def : InstRW<[WriteST], (instregex "^LDC.*$")>;
-def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>;
-def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>;
-def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>;
+def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>;
-def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>;
-def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>;
def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
// Store Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>;
-def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>;
+def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
-def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>;
+def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>;
def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
-def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
index 9cdb4be4246b..e64b2c441a19 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
@@ -28,7 +28,6 @@
//===----------------------------------------------------------------------===//
// Define 1 micro-op types
-
def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
@@ -175,18 +174,33 @@ def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
//===----------------------------------------------------------------------===//
// Define 3 micro-op types
+def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+ FalkorUnitLD]> {
+ let Latency = 0;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
let Latency = 3;
let NumMicroOps = 3;
}
+
def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
let Latency = 4;
let NumMicroOps = 3;
}
+
def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
let Latency = 5;
let NumMicroOps = 3;
}
+
def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
let Latency = 6;
let NumMicroOps = 3;
@@ -196,10 +210,12 @@ def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
let Latency = 4;
let NumMicroOps = 3;
}
+
def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
let Latency = 3;
let NumMicroOps = 3;
}
+
def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
FalkorUnitLD]> {
let Latency = 3;
@@ -259,6 +275,12 @@ def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
let NumMicroOps = 4;
}
+def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
+ FalkorUnitSD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
//===----------------------------------------------------------------------===//
// Define 5 micro-op types
@@ -289,6 +311,13 @@ def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
let NumMicroOps = 6;
}
+def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
+ FalkorUnitVSD, FalkorUnitXYZ,
+ FalkorUnitST, FalkorUnitVSD]> {
+ let Latency = 0;
+ let NumMicroOps = 6;
+}
+
//===----------------------------------------------------------------------===//
// Define 8 micro-op types
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index d7bbc2bcd22c..4dbcc9581a84 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2473,16 +2473,14 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- auto DB = AArch64DB::lookupDBByName(Tok.getString());
- if (!DB) {
- TokError("invalid barrier option name");
- return MatchOperand_ParseFail;
- }
-
// The only valid named option for ISB is 'sy'
- if (Mnemonic == "isb" && DB->Encoding != AArch64DB::sy) {
+ auto DB = AArch64DB::lookupDBByName(Tok.getString());
+ if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) {
TokError("'sy' or #imm operand expected");
return MatchOperand_ParseFail;
+ } else if (!DB) {
+ TokError("invalid barrier option name");
+ return MatchOperand_ParseFail;
}
Operands.push_back(AArch64Operand::CreateBarrier(
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 41ae70f85e58..fc89657bffd3 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -275,6 +276,12 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
}
+ if (Opcode == AArch64::CompilerBarrier) {
+ O << '\t' << MAI.getCommentString() << " COMPILER BARRIER";
+ printAnnotation(O, Annot);
+ return;
+ }
+
if (!printAliasInstr(MI, STI, O))
printInstruction(MI, STI, O);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 62dfa59483eb..33698d2b8c38 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -565,6 +565,9 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup));
return;
+ } else if (MI.getOpcode() == AArch64::CompilerBarrier) {
+ // This just prevents the compiler from reordering accesses, no actual code.
+ return;
}
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 2c7a2d8962d0..0f331486d0f8 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -406,7 +406,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
- FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode
+ FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
+ FeatureFastFMAF32
]
>;
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 318de7f2e3d2..f5110857da84 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -116,8 +116,11 @@ private:
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
- bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
@@ -150,14 +153,12 @@ private:
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Omod) const;
bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp,
SDValue &Omod) const;
@@ -953,8 +954,12 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
return true;
}
+static bool isLegalMUBUFImmOffset(unsigned Imm) {
+ return isUInt<12>(Imm);
+}
+
static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
- return isUInt<12>(Imm->getZExtValue());
+ return isLegalMUBUFImmOffset(Imm->getZExtValue());
}
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
@@ -1076,9 +1081,9 @@ SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
return N;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &ImmOffset) const {
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const {
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
@@ -1087,8 +1092,22 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
- // (add n0, c1)
+ if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ unsigned Imm = CAddr->getZExtValue();
+ assert(!isLegalMUBUFImmOffset(Imm) &&
+ "should have been selected by other pattern");
+
+ SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
+ DL, MVT::i32, HighBits);
+ VAddr = SDValue(MovHighBits, 0);
+ ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ return true;
+ }
+
if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ // (add n0, c1)
+
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
@@ -1107,6 +1126,24 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr,
+ SDValue &SRsrc,
+ SDValue &SOffset,
+ SDValue &Offset) const {
+ ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
+ if (!CAddr || !isLegalMUBUFImmOffset(CAddr))
+ return false;
+
+ SDLoc DL(Addr);
+ MachineFunction &MF = CurDAG->getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+ SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
@@ -1628,38 +1665,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
return isNoNanSrc(Src);
}
-bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
- SDValue &SrcMods) const {
- bool Res = SelectVOP3Mods(In, Src, SrcMods);
- return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
+ if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
+ return false;
+
+ Src = In;
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
SDLoc DL(In);
- // FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
-
- return SelectVOP3Mods(In, Src, SrcMods);
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
- SDValue &SrcMods, SDValue &Clamp,
- SDValue &Omod) const {
- bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
-
- return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
- cast<ConstantSDNode>(Clamp)->isNullValue() &&
- cast<ConstantSDNode>(Omod)->isNullValue();
-}
-
-bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
- SDValue &SrcMods,
- SDValue &Omod) const {
- // FIXME: Handle Omod
- Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
return SelectVOP3Mods(In, Src, SrcMods);
}
@@ -1677,9 +1696,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
Src = In;
SDLoc DL(In);
- // FIXME: Handle Clamp and Omod
- Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index c0f336e082bd..e21775e61dd4 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2315,12 +2315,13 @@ static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
SelectionDAG &DAG = DCI.DAG;
SDValue Op = Node24->getOperand(OpIdx);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = Op.getValueType();
APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
- if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI))
+ if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO))
return true;
return false;
@@ -3361,7 +3362,7 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+ if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
TLI.SimplifyDemandedBits(BitsFrom, Demanded,
KnownZero, KnownOne, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
@@ -3436,6 +3437,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ELSE)
NODE_NAME_CASE(LOOP)
NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_FLAG)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index d6aa0ba92bf7..13cbfe267932 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -231,6 +231,10 @@ public:
AMDGPUAS getAMDGPUAS() const {
return AMDGPUASI;
}
+
+ MVT getFenceOperandTy(const DataLayout &DL) const override {
+ return MVT::i32;
+ }
};
namespace AMDGPUISD {
@@ -244,6 +248,7 @@ enum NodeType : unsigned {
// Function call.
CALL,
+ TRAP,
// Masked control flow nodes.
IF,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 56f060984f08..c1706d12a2ea 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -78,6 +78,11 @@ def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
+def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
+ SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>,
+ [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue]
+>;
+
def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index b8d681298dee..4e688ab0b105 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -50,6 +50,16 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+def u16ImmTarget : AsmOperandClass {
+ let Name = "U16Imm";
+ let RenderMethod = "addImmOperands";
+}
+
+def s16ImmTarget : AsmOperandClass {
+ let Name = "S16Imm";
+ let RenderMethod = "addImmOperands";
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def u32imm : Operand<i32> {
@@ -58,6 +68,12 @@ def u32imm : Operand<i32> {
def u16imm : Operand<i16> {
let PrintMethod = "printU16ImmOperand";
+ let ParserMatchClass = u16ImmTarget;
+}
+
+def s16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+ let ParserMatchClass = s16ImmTarget;
}
def u8imm : Operand<i8> {
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 14ee1c81f8fa..da247fea7de6 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -225,6 +225,12 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ if (MI->getOpcode() == AMDGPU::SI_MASKED_UNREACHABLE) {
+ if (isVerbose())
+ OutStreamer->emitRawComment(" divergent unreachable");
+ return;
+ }
+
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 961f7186f373..70c848f3c7bd 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -479,6 +479,8 @@ public:
bool isSMRDLiteralOffset() const;
bool isDPPCtrl() const;
bool isGPRIdxMode() const;
+ bool isS16Imm() const;
+ bool isU16Imm() const;
StringRef getExpressionAsToken() const {
assert(isExpr());
@@ -2836,6 +2838,28 @@ void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
// s_waitcnt
//===----------------------------------------------------------------------===//
+static bool
+encodeCnt(
+ const AMDGPU::IsaInfo::IsaVersion ISA,
+ int64_t &IntVal,
+ int64_t CntVal,
+ bool Saturate,
+ unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
+ unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
+{
+ bool Failed = false;
+
+ IntVal = encode(ISA, IntVal, CntVal);
+ if (CntVal != decode(ISA, IntVal)) {
+ if (Saturate) {
+ IntVal = encode(ISA, IntVal, -1);
+ } else {
+ Failed = true;
+ }
+ }
+ return Failed;
+}
+
bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
StringRef CntName = Parser.getTok().getString();
int64_t CntVal;
@@ -2851,25 +2875,35 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getParser().parseAbsoluteExpression(CntVal))
return true;
- if (getLexer().isNot(AsmToken::RParen))
- return true;
-
- Parser.Lex();
- if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
- Parser.Lex();
-
AMDGPU::IsaInfo::IsaVersion ISA =
AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
- if (CntName == "vmcnt")
- IntVal = encodeVmcnt(ISA, IntVal, CntVal);
- else if (CntName == "expcnt")
- IntVal = encodeExpcnt(ISA, IntVal, CntVal);
- else if (CntName == "lgkmcnt")
- IntVal = encodeLgkmcnt(ISA, IntVal, CntVal);
- else
- return true;
- return false;
+ bool Failed = true;
+ bool Sat = CntName.endswith("_sat");
+
+ if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
+ Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
+ } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
+ Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
+ } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
+ Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
+ }
+
+ // To improve diagnostics, do not skip delimiters on errors
+ if (!Failed) {
+ if (getLexer().isNot(AsmToken::RParen)) {
+ return true;
+ }
+ Parser.Lex();
+ if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) {
+ const AsmToken NextToken = getLexer().peekTok();
+ if (NextToken.is(AsmToken::Identifier)) {
+ Parser.Lex();
+ }
+ }
+ }
+
+ return Failed;
}
OperandMatchResultTy
@@ -3858,6 +3892,14 @@ bool AMDGPUOperand::isGPRIdxMode() const {
return isImm() && isUInt<4>(getImm());
}
+bool AMDGPUOperand::isS16Imm() const {
+ return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
+}
+
+bool AMDGPUOperand::isU16Imm() const {
+ return isImm() && isUInt<16>(getImm());
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index a6609f0725ab..89eddb9ce961 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -11,7 +11,9 @@ def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
-def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
+def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen">;
+def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [], 20>;
+
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
@@ -958,21 +960,30 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
} // End Predicates = [Has16BitInsts]
-class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat <
- (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset))),
- (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
+multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
+ MUBUF_Pseudo InstrOffset,
+ ValueType vt, PatFrag ld> {
+ def : Pat <
+ (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
+ i32:$soffset, u16imm:$offset))),
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+
+ def : Pat <
+ (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
+ (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+}
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i16, sextloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i16, extloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i32, sextloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i32, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, BUFFER_LOAD_UBYTE_OFFSET, i16, extloadi8_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, BUFFER_LOAD_SSHORT_OFFSET, i32, sextloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, BUFFER_LOAD_USHORT_OFFSET, i32, extloadi16_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, BUFFER_LOAD_DWORD_OFFSET, i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, BUFFER_LOAD_DWORDX2_OFFSET, v2i32, load_private>;
+defm : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, BUFFER_LOAD_DWORDX4_OFFSET, v4i32, load_private>;
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt,
@@ -1054,19 +1065,29 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, global_store>;
-class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat <
- (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
- u16imm:$offset)),
- (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
+multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
+ MUBUF_Pseudo InstrOffset,
+ ValueType vt, PatFrag st> {
+ def : Pat <
+ (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
+ i32:$soffset, u16imm:$offset)),
+ (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+
+ def : Pat <
+ (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
+ u16imm:$offset)),
+ (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0)
+ >;
+}
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i16, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i16, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i32, truncstorei8_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i32, truncstorei16_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, BUFFER_STORE_SHORT_OFFSET, i16, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, BUFFER_STORE_DWORD_OFFSET, i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, BUFFER_STORE_DWORDX2_OFFSET, v2i32, store_private>;
+defm : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, BUFFER_STORE_DWORDX4_OFFSET, v4i32, store_private>;
//===----------------------------------------------------------------------===//
// MTBUF Patterns
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
index 4ecfa118fb27..bf16a8216001 100644
--- a/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -83,8 +83,8 @@ unsigned GCNRegPressure::getRegKind(unsigned Reg,
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
return STI->isSGPRClass(RC) ?
- (RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) :
- (RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE);
+ (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
+ (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
index 29a6ab9fbe93..647017d5061d 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -286,20 +286,20 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
return ValueKind::Pipe;
return StringSwitch<ValueKind>(BaseTypeName)
+ .Case("image1d_t", ValueKind::Image)
+ .Case("image1d_array_t", ValueKind::Image)
+ .Case("image1d_buffer_t", ValueKind::Image)
+ .Case("image2d_t", ValueKind::Image)
+ .Case("image2d_array_t", ValueKind::Image)
+ .Case("image2d_array_depth_t", ValueKind::Image)
+ .Case("image2d_array_msaa_t", ValueKind::Image)
+ .Case("image2d_array_msaa_depth_t", ValueKind::Image)
+ .Case("image2d_depth_t", ValueKind::Image)
+ .Case("image2d_msaa_t", ValueKind::Image)
+ .Case("image2d_msaa_depth_t", ValueKind::Image)
+ .Case("image3d_t", ValueKind::Image)
.Case("sampler_t", ValueKind::Sampler)
.Case("queue_t", ValueKind::Queue)
- .Cases("image1d_t",
- "image1d_array_t",
- "image1d_buffer_t",
- "image2d_t" ,
- "image2d_array_t",
- "image2d_array_depth_t",
- "image2d_array_msaa_t"
- "image2d_array_msaa_depth_t"
- "image2d_depth_t",
- "image2d_msaa_t",
- "image2d_msaa_depth_t",
- "image3d_t", ValueKind::Image)
.Default(isa<PointerType>(Ty) ?
(Ty->getPointerAddressSpace() ==
AMDGPUASI.LOCAL_ADDRESS ?
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 6c61fb1f2d6b..2364e7b7b5fb 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -15,6 +15,7 @@ using namespace llvm;
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
CodePointerSize = (TT.getArch() == Triple::amdgcn) ? 8 : 4;
+ StackGrowsUp = true;
HasSingleParameterDotFile = false;
//===------------------------------------------------------------------===//
MinInstAlignment = 4;
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index f9d258f44a62..b0f0bf04a891 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -81,6 +81,11 @@ using namespace llvm;
#define DEBUG_TYPE "si-fix-sgpr-copies"
+static cl::opt<bool> EnableM0Merge(
+ "amdgpu-enable-merge-m0",
+ cl::desc("Merge and hoist M0 initializations"),
+ cl::init(false));
+
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
@@ -108,7 +113,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
"SI Fix SGPR copies", false, false)
@@ -332,27 +337,186 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
return true;
}
-static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
- const TargetRegisterInfo *TRI) {
- DenseSet<MachineBasicBlock*> Visited;
+template <class UnaryPredicate>
+bool searchPredecessors(const MachineBasicBlock *MBB,
+ const MachineBasicBlock *CutOff,
+ UnaryPredicate Predicate) {
+
+ if (MBB == CutOff)
+ return false;
+
+ DenseSet<const MachineBasicBlock*> Visited;
SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
MBB->pred_end());
while (!Worklist.empty()) {
- MachineBasicBlock *mbb = Worklist.back();
- Worklist.pop_back();
+ MachineBasicBlock *MBB = Worklist.pop_back_val();
- if (!Visited.insert(mbb).second)
+ if (!Visited.insert(MBB).second)
continue;
- if (hasTerminatorThatModifiesExec(*mbb, *TRI))
+ if (MBB == CutOff)
+ continue;
+ if (Predicate(MBB))
return true;
- Worklist.insert(Worklist.end(), mbb->pred_begin(), mbb->pred_end());
+ Worklist.append(MBB->pred_begin(), MBB->pred_end());
}
return false;
}
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) {
+ return hasTerminatorThatModifiesExec(*MBB, *TRI); });
+}
+
+// Checks if there is potential path From instruction To instruction.
+// If CutOff is specified and it sits in between of that path we ignore
+// a higher portion of the path and report it is not reachable.
+static bool isReachable(const MachineInstr *From,
+ const MachineInstr *To,
+ const MachineBasicBlock *CutOff,
+ MachineDominatorTree &MDT) {
+ // If either From block dominates To block or instructions are in the same
+ // block and From is higher.
+ if (MDT.dominates(From, To))
+ return true;
+
+ const MachineBasicBlock *MBBFrom = From->getParent();
+ const MachineBasicBlock *MBBTo = To->getParent();
+ if (MBBFrom == MBBTo)
+ return false;
+
+ // Instructions are in different blocks, do predecessor search.
+ // We should almost never get here since we do not usually produce M0 stores
+ // other than -1.
+ return searchPredecessors(MBBTo, CutOff, [MBBFrom]
+ (const MachineBasicBlock *MBB) { return MBB == MBBFrom; });
+}
+
+// Hoist and merge identical SGPR initializations into a common predecessor.
+// This is intended to combine M0 initializations, but can work with any
+// SGPR. A VGPR cannot be processed since we cannot guarantee vector
+// executioon.
+static bool hoistAndMergeSGPRInits(unsigned Reg,
+ const MachineRegisterInfo &MRI,
+ MachineDominatorTree &MDT) {
+ // List of inits by immediate value.
+ typedef std::map<unsigned, std::list<MachineInstr*>> InitListMap;
+ InitListMap Inits;
+ // List of clobbering instructions.
+ SmallVector<MachineInstr*, 8> Clobbers;
+ bool Changed = false;
+
+ for (auto &MI : MRI.def_instructions(Reg)) {
+ MachineOperand *Imm = nullptr;
+ for (auto &MO: MI.operands()) {
+ if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
+ (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
+ Imm = nullptr;
+ break;
+ } else if (MO.isImm())
+ Imm = &MO;
+ }
+ if (Imm)
+ Inits[Imm->getImm()].push_front(&MI);
+ else
+ Clobbers.push_back(&MI);
+ }
+
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+
+ for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) {
+ MachineInstr *MI1 = *I1;
+
+ for (auto I2 = std::next(I1); I2 != E; ) {
+ MachineInstr *MI2 = *I2;
+
+ // Check any possible interference
+ auto intereferes = [&](MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To) -> bool {
+
+ assert(MDT.dominates(&*To, &*From));
+
+ auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool {
+ const MachineBasicBlock *MBBFrom = From->getParent();
+ const MachineBasicBlock *MBBTo = To->getParent();
+ bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT);
+ bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT);
+ if (!MayClobberFrom && !MayClobberTo)
+ return false;
+ if ((MayClobberFrom && !MayClobberTo) ||
+ (!MayClobberFrom && MayClobberTo))
+ return true;
+ // Both can clobber, this is not an interference only if both are
+ // dominated by Clobber and belong to the same block or if Clobber
+ // properly dominates To, given that To >> From, so it dominates
+ // both and located in a common dominator.
+ return !((MBBFrom == MBBTo &&
+ MDT.dominates(Clobber, &*From) &&
+ MDT.dominates(Clobber, &*To)) ||
+ MDT.properlyDominates(Clobber->getParent(), MBBTo));
+ };
+
+ return (any_of(Clobbers, interferes)) ||
+ (any_of(Inits, [&](InitListMap::value_type &C) {
+ return C.first != Init.first && any_of(C.second, interferes);
+ }));
+ };
+
+ if (MDT.dominates(MI1, MI2)) {
+ if (!intereferes(MI2, MI1)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI2->getParent()->getNumber()
+ << " " << *MI2);
+ MI2->eraseFromParent();
+ Defs.erase(I2++);
+ Changed = true;
+ continue;
+ }
+ } else if (MDT.dominates(MI2, MI1)) {
+ if (!intereferes(MI1, MI2)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
+ << " " << *MI1);
+ MI1->eraseFromParent();
+ Defs.erase(I1++);
+ Changed = true;
+ break;
+ }
+ } else {
+ auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(),
+ MI2->getParent());
+ if (!MBB) {
+ ++I2;
+ continue;
+ }
+
+ MachineBasicBlock::iterator I = MBB->getFirstNonPHI();
+ if (!intereferes(MI1, I) && !intereferes(MI2, I)) {
+ DEBUG(dbgs() << "Erasing from BB#" << MI1->getParent()->getNumber()
+ << " " << *MI1 << "and moving from BB#"
+ << MI2->getParent()->getNumber() << " to BB#"
+ << I->getParent()->getNumber() << " " << *MI2);
+ I->getParent()->splice(I, MI2->getParent(), MI2);
+ MI1->eraseFromParent();
+ Defs.erase(I1++);
+ Changed = true;
+ break;
+ }
+ }
+ ++I2;
+ }
+ ++I1;
+ }
+ }
+
+ if (Changed)
+ MRI.clearKillFlags(Reg);
+
+ return Changed;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -485,5 +649,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
}
+ if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
+ hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT);
+
return true;
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index abe6af9a6d3f..86e3b37b09e9 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -101,10 +101,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
const SIRegisterInfo *TRI,
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
// We need to insert initialization of the scratch resource descriptor.
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
- if (ScratchRsrcReg == AMDGPU::NoRegister)
+ if (ScratchRsrcReg == AMDGPU::NoRegister ||
+ !MRI.isPhysRegUsed(ScratchRsrcReg))
return AMDGPU::NoRegister;
if (ST.hasSGPRInitBug() ||
@@ -122,8 +124,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
// We find the resource first because it has an alignment requirement.
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
@@ -143,24 +143,34 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
return ScratchRsrcReg;
}
-unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+// Shift down registers reserved for the scratch wave offset and stack pointer
+// SGPRs.
+std::pair<unsigned, unsigned>
+SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
const SISubtarget &ST,
const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
- if (ST.hasSGPRInitBug() ||
- ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF))
- return ScratchWaveOffsetReg;
- unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ // No replacement necessary.
+ if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
+ !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
+ assert(MFI->getStackPtrOffsetReg() == AMDGPU::NoRegister);
+ return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+ }
+
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ if (ST.hasSGPRInitBug())
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
+
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
AllSGPRs = AllSGPRs.slice(NumPreloaded);
@@ -175,26 +185,42 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
// register from the list to consider, it means that when this
// register is being used for the scratch wave offset and there
// are no other free SGPRs, then the value will stay in this register.
+ // + 1 if stack pointer is used.
// ----
- // 13
- if (AllSGPRs.size() < 13)
- return ScratchWaveOffsetReg;
+ // 13 (+1)
+ unsigned ReservedRegCount = 13;
+ if (SPReg != AMDGPU::NoRegister)
+ ++ReservedRegCount;
- for (MCPhysReg Reg : AllSGPRs.drop_back(13)) {
+ if (AllSGPRs.size() < ReservedRegCount)
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
+
+ bool HandledScratchWaveOffsetReg =
+ ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+
+ for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) {
// Pick the first unallocated SGPR. Be careful not to pick an alias of the
// scratch descriptor, since we haven’t added its uses yet.
- if (!MRI.isPhysRegUsed(Reg)) {
- if (!MRI.isAllocatable(Reg) ||
- TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
- continue;
+ if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
+ if (!HandledScratchWaveOffsetReg) {
+ HandledScratchWaveOffsetReg = true;
- MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
- MFI->setScratchWaveOffsetReg(Reg);
- return Reg;
+ MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ MFI->setScratchWaveOffsetReg(Reg);
+ ScratchWaveOffsetReg = Reg;
+ } else {
+ if (SPReg == AMDGPU::NoRegister)
+ break;
+
+ MRI.replaceRegWith(SPReg, Reg);
+ MFI->setStackPtrOffsetReg(Reg);
+ SPReg = Reg;
+ break;
+ }
}
}
- return ScratchWaveOffsetReg;
+ return std::make_pair(ScratchWaveOffsetReg, SPReg);
}
void SIFrameLowering::emitPrologue(MachineFunction &MF,
@@ -220,18 +246,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- unsigned ScratchRsrcReg
- = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
- unsigned ScratchWaveOffsetReg
- = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
-
- if (ScratchRsrcReg == AMDGPU::NoRegister) {
- assert(ScratchWaveOffsetReg == AMDGPU::NoRegister);
- return;
- }
-
- assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
-
// We need to do the replacement of the private segment buffer and wave offset
// register even if there are no stack objects. There could be stores to undef
// or a constant without an associated object.
@@ -244,19 +258,49 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ if (SPReg != AMDGPU::NoRegister) {
+ DebugLoc DL;
+ int64_t StackSize = MF.getFrameInfo().getStackSize();
+
+ if (StackSize == 0) {
+ BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ } else {
+ BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg())
+ .addImm(StackSize * ST.getWavefrontSize());
+ }
+ }
+
+ unsigned ScratchRsrcReg
+ = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
+
+ unsigned ScratchWaveOffsetReg;
+ std::tie(ScratchWaveOffsetReg, SPReg)
+ = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
+
+ // It's possible to have uses of only ScratchWaveOffsetReg without
+ // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
+ // but the inverse is not true.
+ if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
+ assert(ScratchRsrcReg == AMDGPU::NoRegister);
+ return;
+ }
+
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
- bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg);
- bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg);
+ bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
+ bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
+ MRI.isPhysRegUsed(ScratchRsrcReg);
// We added live-ins during argument lowering, but since they were not used
// they were deleted. We're adding the uses now, so add them back.
@@ -469,7 +513,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// this also ensures we shouldn't need a register for the offset when
// emergency scavenging.
int ScavengeFI = MFI.CreateFixedObject(
- AMDGPU::SGPR_32RegClass.getSize(), 0, false);
+ TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
}
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index 1bfc08093da2..7ccd02b3c86a 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -49,7 +49,7 @@ private:
SIMachineFunctionInfo *MFI,
MachineFunction &MF) const;
- unsigned getReservedPrivateSegmentWaveByteOffsetReg(
+ std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg(
const SISubtarget &ST,
const SIInstrInfo *TII,
const SIRegisterInfo *TRI,
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index dd867b15b4c7..ce74a7cd8b04 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -287,8 +287,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction(ISD::TRAP, MVT::Other, Custom);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -1644,7 +1644,7 @@ computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
const TargetRegisterClass *SuperRC,
unsigned VecReg,
int Offset) {
- int NumElts = SuperRC->getSize() / 4;
+ int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32;
// Skip out of bounds offsets, or else we would end up using an undefined
// register.
@@ -1793,17 +1793,18 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
return LoopBB;
}
-static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) {
- switch (VecRC->getSize()) {
- case 4:
+static unsigned getMOVRELDPseudo(const SIRegisterInfo &TRI,
+ const TargetRegisterClass *VecRC) {
+ switch (TRI.getRegSizeInBits(*VecRC)) {
+ case 32: // 4 bytes
return AMDGPU::V_MOVRELD_B32_V1;
- case 8:
+ case 64: // 8 bytes
return AMDGPU::V_MOVRELD_B32_V2;
- case 16:
+ case 128: // 16 bytes
return AMDGPU::V_MOVRELD_B32_V4;
- case 32:
+ case 256: // 32 bytes
return AMDGPU::V_MOVRELD_B32_V8;
- case 64:
+ case 512: // 64 bytes
return AMDGPU::V_MOVRELD_B32_V16;
default:
llvm_unreachable("unsupported size for MOVRELD pseudos");
@@ -1863,7 +1864,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
} else {
- const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
+ const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC));
BuildMI(MBB, I, DL, MovRelDesc)
.addReg(Dst, RegState::Define)
@@ -1907,7 +1908,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
.addReg(PhiReg, RegState::Implicit)
.addReg(AMDGPU::M0, RegState::Implicit);
} else {
- const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
+ const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(TRI, VecRC));
BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
.addReg(Dst, RegState::Define)
@@ -1948,50 +1949,6 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
switch (MI.getOpcode()) {
- case AMDGPU::S_TRAP_PSEUDO: {
- const DebugLoc &DL = MI.getDebugLoc();
- const int TrapType = MI.getOperand(0).getImm();
-
- if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
- Subtarget->isTrapHandlerEnabled()) {
-
- MachineFunction *MF = BB->getParent();
- SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
- unsigned UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
-
- if (!BB->isLiveIn(UserSGPR))
- BB->addLiveIn(UserSGPR);
-
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
- .addReg(UserSGPR);
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
- .addImm(TrapType)
- .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
- } else {
- switch (TrapType) {
- case SISubtarget::TrapIDLLVMTrap:
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
- break;
- case SISubtarget::TrapIDLLVMDebugTrap: {
- DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
- "debugtrap handler not supported",
- DL,
- DS_Warning);
- LLVMContext &C = MF->getFunction()->getContext();
- C.diagnose(NoTrap);
- BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
- .addImm(0);
- break;
- }
- default:
- llvm_unreachable("unsupported trap handler type!");
- }
- }
-
- MI.eraseFromParent();
- return BB;
- }
case AMDGPU::SI_INIT_M0:
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
@@ -2163,6 +2120,10 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
+
+ case ISD::TRAP:
+ case ISD::DEBUGTRAP:
+ return lowerTRAP(Op, DAG);
}
return SDValue();
}
@@ -2431,6 +2392,57 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
}
+SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDValue Chain = Op.getOperand(0);
+
+ unsigned TrapID = Op.getOpcode() == ISD::DEBUGTRAP ?
+ SISubtarget::TrapIDLLVMDebugTrap : SISubtarget::TrapIDLLVMTrap;
+
+ if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
+ Subtarget->isTrapHandlerEnabled()) {
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+ assert(UserSGPR != AMDGPU::NoRegister);
+
+ SDValue QueuePtr = CreateLiveInRegister(
+ DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+
+ SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
+
+ SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
+ QueuePtr, SDValue());
+
+ SDValue Ops[] = {
+ ToReg,
+ DAG.getTargetConstant(TrapID, SL, MVT::i16),
+ SGPR01,
+ ToReg.getValue(1)
+ };
+
+ return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
+ }
+
+ switch (TrapID) {
+ case SISubtarget::TrapIDLLVMTrap:
+ return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
+ case SISubtarget::TrapIDLLVMDebugTrap: {
+ DiagnosticInfoUnsupported NoTrap(*MF.getFunction(),
+ "debugtrap handler not supported",
+ Op.getDebugLoc(),
+ DS_Warning);
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ Ctx.diagnose(NoTrap);
+ return Chain;
+ }
+ default:
+ llvm_unreachable("unsupported trap handler type!");
+ }
+
+ return Chain;
+}
+
SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
// FIXME: Use inline constants (src_{shared, private}_base) instead.
@@ -3410,9 +3422,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
EVT VT = Op.getValueType();
bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
+ if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals())
+ return SDValue();
+
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
- if (Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
- VT == MVT::f16) {
+ if (Unsafe || VT == MVT::f32 || VT == MVT::f16) {
if (CLHS->isExactlyValue(1.0)) {
// v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
// the CI documentation has a worst case error of 1 ulp.
@@ -4696,7 +4710,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+ if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) ||
TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
}
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c2a3e62aa827..9122cd72d323 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -428,8 +428,8 @@ RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
const MachineInstr &MIA = *MI;
const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo);
- unsigned Size = RC->getSize();
- Result.second = Result.first + (Size / 4);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+ Result.second = Result.first + (Size / 32);
return Result;
}
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 47257ce16ceb..9f32ecfa52ff 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -216,8 +216,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
// XXX - What if this is a write into a super register?
const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
- unsigned Size = RC->getSize();
- Result.Named.LGKM = Size > 4 ? 2 : 1;
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+ Result.Named.LGKM = Size > 32 ? 2 : 1;
} else {
// s_dcache_inv etc. do not have a a destination register. Assume we
// want a wait on these.
@@ -289,12 +289,12 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
const MachineOperand &Reg) const {
- unsigned Size = RC->getSize();
- assert(Size >= 4);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+ assert(Size >= 32);
RegInterval Result;
Result.first = TRI->getEncodingValue(Reg.getReg());
- Result.second = Result.first + Size / 4;
+ Result.second = Result.first + Size / 32;
return Result;
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 05ac67d26620..92e452a3d6a0 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -138,6 +138,11 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
}
if (isSMRD(Opc0) && isSMRD(Opc1)) {
+ // Skip time and cache invalidation instructions.
+ if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 ||
+ AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1)
+ return false;
+
assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1));
// Check base reg.
@@ -245,11 +250,11 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
unsigned EltSize;
if (LdSt.mayLoad())
- EltSize = getOpRegClass(LdSt, 0)->getSize() / 2;
+ EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, 0)) / 16;
else {
assert(LdSt.mayStore());
int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
- EltSize = getOpRegClass(LdSt, Data0Idx)->getSize();
+ EltSize = TRI->getRegSizeInBits(*getOpRegClass(LdSt, Data0Idx)) / 8;
}
if (isStride64(Opc))
@@ -345,7 +350,7 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
FirstLdSt.getParent()->getParent()->getRegInfo();
const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg());
- return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
+ return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
}
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
@@ -433,7 +438,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isSGPRClass(RC)) {
- if (RC->getSize() > 4) {
+ if (RI.getRegSizeInBits(*RC) > 32) {
Opcode = AMDGPU::S_MOV_B64;
EltSize = 8;
} else {
@@ -493,11 +498,11 @@ int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
- if (DstRC->getSize() == 4) {
+ if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
- } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) {
+ } else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC)) {
return AMDGPU::S_MOV_B64;
- } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) {
+ } else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC)) {
return AMDGPU::V_MOV_B64_PSEUDO;
}
return AMDGPU::COPY;
@@ -557,17 +562,18 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineMemOperand *MMO
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
Size, Align);
+ unsigned SpillSize = TRI->getSpillSize(*RC);
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling SGPRs.
- const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize()));
+ const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(SpillSize));
// The SGPR spill/restore instructions only work on number sgprs, so we need
// to make sure we are using the correct register class.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) {
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
}
@@ -602,7 +608,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
- unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
+ unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(isKill)) // data
@@ -660,6 +666,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
unsigned Size = FrameInfo.getObjectSize(FrameIndex);
+ unsigned SpillSize = TRI->getSpillSize(*RC);
MachinePointerInfo PtrInfo
= MachinePointerInfo::getFixedStack(*MF, FrameIndex);
@@ -670,8 +677,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.isSGPRClass(RC)) {
// FIXME: Maybe this should not include a memoperand because it will be
// lowered to non-memory instructions.
- const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize()));
- if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) {
+ const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(SpillSize));
+ if (TargetRegisterInfo::isVirtualRegister(DestReg) && SpillSize == 4) {
MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
}
@@ -701,7 +708,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
- unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
+ unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
@@ -1440,9 +1447,9 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
- unsigned DstSize = DstRC->getSize();
+ unsigned DstSize = RI.getRegSizeInBits(*DstRC);
- if (DstSize == 4) {
+ if (DstSize == 32) {
unsigned SelOp = Pred == SCC_TRUE ?
AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
@@ -1456,7 +1463,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
return;
}
- if (DstSize == 8 && Pred == SCC_TRUE) {
+ if (DstSize == 64 && Pred == SCC_TRUE) {
MachineInstr *Select =
BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
.addReg(FalseReg)
@@ -1483,7 +1490,7 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
const int16_t *SubIndices = Sub0_15;
- int NElts = DstSize / 4;
+ int NElts = DstSize / 32;
// 64-bit select is only avaialble for SALU.
if (Pred == SCC_TRUE) {
@@ -2635,6 +2642,19 @@ void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
return;
+ // Special case: V_READLANE_B32 accepts only immediate or SGPR operands for
+ // lane select. Fix up using V_READFIRSTLANE, since we assume that the lane
+ // select is uniform.
+ if (Opc == AMDGPU::V_READLANE_B32 && Src1.isReg() &&
+ RI.isVGPR(MRI, Src1.getReg())) {
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ const DebugLoc &DL = MI.getDebugLoc();
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src1);
+ Src1.ChangeToRegister(Reg, false);
+ return;
+ }
+
// We do not use commuteInstruction here because it is too aggressive and will
// commute if it is possible. We only want to commute here if it improves
// legality. This can be called a fairly large number of times so don't waste
@@ -2729,7 +2749,7 @@ unsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI,
const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg);
const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC);
unsigned DstReg = MRI.createVirtualRegister(SRC);
- unsigned SubRegs = VRC->getSize() / 4;
+ unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
SmallVector<unsigned, 8> SRegs;
for (unsigned i = 0; i < SubRegs; ++i) {
@@ -3595,7 +3615,7 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
.addImm(16)
.add(Src0);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
- .addImm(0xffff);
+ .addImm(0xffff0000);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
.add(Src1)
.addReg(ImmReg, RegState::Kill)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 659473ca6a47..03a5ef74b179 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -626,13 +626,13 @@ public:
return 4;
}
- return RI.getRegClass(OpInfo.RegClass)->getSize();
+ return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
}
/// \brief This form should usually be preferred since it handles operands
/// with unknown register classes.
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
- return getOpRegClass(MI, OpNo)->getSize();
+ return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
}
/// \returns true if it is legal for the operand at index \p OpNo
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index c6daf743f3ac..7b052844f177 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -646,11 +646,10 @@ def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
-def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
-def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
+def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 2f89503e129a..3f6ddec70479 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -94,6 +94,12 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
+def ATOMIC_FENCE : SPseudoInstSI<
+ (outs), (ins i32imm:$ordering, i32imm:$scope),
+ [(atomic_fence (i32 imm:$ordering), (i32 imm:$scope))],
+ "ATOMIC_FENCE $ordering, $scope"> {
+ let hasSideEffects = 1;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
@@ -111,12 +117,6 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
-def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
- let hasSideEffects = 1;
- let SALU = 1;
- let usesCustomInserter = 1;
-}
-
let usesCustomInserter = 1, SALU = 1 in {
def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
@@ -400,13 +400,8 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
let Predicates = [isGCN] in {
def : Pat<
- (trap),
- (S_TRAP_PSEUDO TRAPID.LLVM_TRAP)
->;
-
-def : Pat<
- (debugtrap),
- (S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP)
+ (AMDGPUtrap timm:$trapid),
+ (S_TRAP $trapid)
>;
def : Pat<
@@ -477,8 +472,8 @@ def : Pat <
// fp_to_fp16 patterns
def : Pat <
- (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))),
- (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod)
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
+ (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
@@ -507,11 +502,11 @@ def : Pat <
multiclass FMADPat <ValueType vt, Instruction inst> {
def : Pat <
- (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
- (VOP3NoMods vt:$src1, i32:$src1_modifiers),
- (VOP3NoMods vt:$src2, i32:$src2_modifiers))),
- (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
- $src2_modifiers, $src2, $clamp, $omod)
+ (vt (fmad (VOP3NoMods vt:$src0),
+ (VOP3NoMods vt:$src1),
+ (VOP3NoMods vt:$src2))),
+ (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
}
@@ -681,10 +676,9 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
// If denormals are not enabled, it only impacts the compare of the
// inputs. The output result is not flushed.
class ClampPat<Instruction inst, ValueType vt> : Pat <
- (vt (AMDGPUclamp
- (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))),
+ (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))),
(inst i32:$src0_modifiers, vt:$src0,
- i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod)
+ i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE)
>;
def : ClampPat<V_MAX_F32_e64, f32>;
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 8e612d2ddfda..b6a982aee6be 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -25,6 +25,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
TIDReg(AMDGPU::NoRegister),
ScratchRSrcReg(AMDGPU::NoRegister),
ScratchWaveOffsetReg(AMDGPU::NoRegister),
+ FrameOffsetReg(AMDGPU::NoRegister),
+ StackPtrOffsetReg(AMDGPU::NoRegister),
PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
DispatchPtrUserSGPR(AMDGPU::NoRegister),
QueuePtrUserSGPR(AMDGPU::NoRegister),
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 810fb05984c4..dc9f509e60ae 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -88,6 +88,14 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
unsigned ScratchRSrcReg;
unsigned ScratchWaveOffsetReg;
+ // This is the current function's incremented size from the kernel's scratch
+ // wave offset register. For an entry function, this is exactly the same as
+ // the ScratchWaveOffsetReg.
+ unsigned FrameOffsetReg;
+
+ // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
+ unsigned StackPtrOffsetReg;
+
// Input registers for non-HSA ABI
unsigned PrivateMemoryPtrUserSGPR;
@@ -364,9 +372,25 @@ public:
return ScratchWaveOffsetReg;
}
+ unsigned getFrameOffsetReg() const {
+ return FrameOffsetReg;
+ }
+
+ void setStackPtrOffsetReg(unsigned Reg) {
+ assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+ StackPtrOffsetReg = Reg;
+ }
+
+ unsigned getStackPtrOffsetReg() const {
+ return StackPtrOffsetReg;
+ }
+
void setScratchWaveOffsetReg(unsigned Reg) {
assert(Reg != AMDGPU::NoRegister && "Should never be unset");
ScratchWaveOffsetReg = Reg;
+
+ // FIXME: Only for entry functions.
+ FrameOffsetReg = ScratchWaveOffsetReg;
}
unsigned getQueuePtrUserSGPR() const {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 098c67252dd8..8820e294562b 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -146,6 +146,9 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
+ // M0 has to be reserved so that llvm accepts it as a live-in into a block.
+ reserveRegisterTuples(Reserved, AMDGPU::M0);
+
// Reserve the memory aperture registers.
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
@@ -615,7 +618,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
if (SpillToSMEM && isSGPRClass(RC)) {
// XXX - if private_element_size is larger than 4 it might be useful to be
// able to spill wider vmem spills.
- std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true);
+ std::tie(EltSize, ScalarStoreOp) =
+ getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
}
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
@@ -775,7 +779,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (SpillToSMEM && isSGPRClass(RC)) {
// XXX - if private_element_size is larger than 4 it might be useful to be
// able to spill wider vmem spills.
- std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false);
+ std::tie(EltSize, ScalarLoadOp) =
+ getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
}
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
@@ -1038,20 +1043,21 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
// TODO: It might be helpful to have some target specific flags in
// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- switch (RC->getSize()) {
- case 0: return false;
- case 1: return false;
- case 4:
+ unsigned Size = getRegSizeInBits(*RC);
+ if (Size < 32)
+ return false;
+ switch (Size) {
+ case 32:
return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
- case 8:
+ case 64:
return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
- case 12:
+ case 96:
return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
- case 16:
+ case 128:
return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
- case 32:
+ case 256:
return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
- case 64:
+ case 512:
return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
default:
llvm_unreachable("Invalid register class size");
@@ -1060,18 +1066,18 @@ bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const {
- switch (SRC->getSize()) {
- case 4:
+ switch (getRegSizeInBits(*SRC)) {
+ case 32:
return &AMDGPU::VGPR_32RegClass;
- case 8:
+ case 64:
return &AMDGPU::VReg_64RegClass;
- case 12:
+ case 96:
return &AMDGPU::VReg_96RegClass;
- case 16:
+ case 128:
return &AMDGPU::VReg_128RegClass;
- case 32:
+ case 256:
return &AMDGPU::VReg_256RegClass;
- case 64:
+ case 512:
return &AMDGPU::VReg_512RegClass;
default:
llvm_unreachable("Invalid register class size");
@@ -1080,16 +1086,16 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
const TargetRegisterClass *VRC) const {
- switch (VRC->getSize()) {
- case 4:
+ switch (getRegSizeInBits(*VRC)) {
+ case 32:
return &AMDGPU::SGPR_32RegClass;
- case 8:
+ case 64:
return &AMDGPU::SReg_64RegClass;
- case 16:
+ case 128:
return &AMDGPU::SReg_128RegClass;
- case 32:
+ case 256:
return &AMDGPU::SReg_256RegClass;
- case 64:
+ case 512:
return &AMDGPU::SReg_512RegClass;
default:
llvm_unreachable("Invalid register class size");
@@ -1354,15 +1360,15 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
const TargetRegisterClass *DstRC,
unsigned DstSubReg,
const TargetRegisterClass *NewRC) const {
- unsigned SrcSize = SrcRC->getSize();
- unsigned DstSize = DstRC->getSize();
- unsigned NewSize = NewRC->getSize();
+ unsigned SrcSize = getRegSizeInBits(*SrcRC);
+ unsigned DstSize = getRegSizeInBits(*DstRC);
+ unsigned NewSize = getRegSizeInBits(*NewRC);
// Do not increase size of registers beyond dword, we would need to allocate
// adjacent registers and constraint regalloc more than needed.
// Always allow dword coalescing.
- if (SrcSize <= 4 || DstSize <= 4)
+ if (SrcSize <= 32 || DstSize <= 32)
return true;
return NewSize <= DstSize || NewSize <= SrcSize;
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index b4adbdd1df07..593439c2a3cd 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -530,14 +530,16 @@ class SOPKInstTable <bit is_sopk, string cmpOp = ""> {
class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
opName,
(outs SReg_32:$sdst),
- (ins u16imm:$simm16),
+ (ins s16imm:$simm16),
"$sdst, $simm16",
pattern>;
-class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo <
+class SOPK_SCC <string opName, string base_op, bit isSignExt> : SOPK_Pseudo <
opName,
(outs),
- (ins SReg_32:$sdst, u16imm:$simm16),
+ !if(isSignExt,
+ (ins SReg_32:$sdst, s16imm:$simm16),
+ (ins SReg_32:$sdst, u16imm:$simm16)),
"$sdst, $simm16", []>,
SOPKInstTable<1, base_op>{
let Defs = [SCC];
@@ -546,7 +548,7 @@ class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo <
class SOPK_32TIE <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
opName,
(outs SReg_32:$sdst),
- (ins SReg_32:$src0, u16imm:$simm16),
+ (ins SReg_32:$src0, s16imm:$simm16),
"$sdst, $simm16",
pattern
>;
@@ -575,20 +577,20 @@ let isCompare = 1 in {
// [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
// >;
-def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">;
-def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">;
-def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">;
-def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">;
-def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">;
-def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">;
+def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32", 1>;
+def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32", 1>;
+def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32", 1>;
+def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32", 1>;
+def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32", 1>;
+def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32", 1>;
let SOPKZext = 1 in {
-def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">;
-def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">;
-def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">;
-def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">;
-def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">;
-def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">;
+def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32", 0>;
+def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32", 0>;
+def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32", 0>;
+def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32", 0>;
+def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32", 0>;
+def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32", 0>;
} // End SOPKZext = 1
} // End isCompare = 1
@@ -600,7 +602,7 @@ let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
def S_CBRANCH_I_FORK : SOPK_Pseudo <
"s_cbranch_i_fork",
- (outs), (ins SReg_64:$sdst, u16imm:$simm16),
+ (outs), (ins SReg_64:$sdst, s16imm:$simm16),
"$sdst, $simm16"
>;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 86095a8e1142..5a3242bed1d0 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -93,6 +93,12 @@ unsigned getVmcntBitWidthHi() { return 2; }
} // end namespace anonymous
namespace llvm {
+
+static cl::opt<bool> EnablePackedInlinableLiterals(
+ "enable-packed-inlinable-literals",
+ cl::desc("Enable packed inlinable literals (v2f16, v2i16)"),
+ cl::init(false));
+
namespace AMDGPU {
namespace IsaInfo {
@@ -703,6 +709,9 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
+ if (!EnablePackedInlinableLiterals)
+ return false;
+
int16_t Lo16 = static_cast<int16_t>(Literal);
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 4f5711ca9a79..5c9d589e2625 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -905,7 +905,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), Align);
- switch (RC->getSize()) {
+ switch (TRI->getSpillSize(*RC)) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::STRi12))
@@ -1103,7 +1103,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
- switch (RC->getSize()) {
+ switch (TRI->getSpillSize(*RC)) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index faf1c631a3a7..28c407f74125 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -105,10 +105,6 @@ public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
- virtual void getNoopForElfTarget(MCInst &NopInst) const {
- getNoopForMachoTarget(NopInst);
- }
-
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 70a44eaaceb8..a20887564f44 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -806,7 +806,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI,
if (!DstSubReg)
return true;
// Small registers don't frequently cause a problem, so we can coalesce them.
- if (NewRC->getSize() < 32 && DstRC->getSize() < 32 && SrcRC->getSize() < 32)
+ if (getRegSizeInBits(*NewRC) < 256 && getRegSizeInBits(*DstRC) < 256 &&
+ getRegSizeInBits(*SrcRC) < 256)
return true;
auto NewRCWeight =
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 94b317a8f986..13fb30767c9f 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -35,7 +35,8 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
EVT VT = TLI.getValueType(DL, T, true);
- if (!VT.isSimple() || VT.isVector())
+ if (!VT.isSimple() || VT.isVector() ||
+ !(VT.isInteger() || VT.isFloatingPoint()))
return false;
unsigned VTSize = VT.getSimpleVT().getSizeInBits();
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index e0aecff2633b..78a9144bd321 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -661,7 +661,6 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
return false;
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
- case MachineOperand::MO_Placeholder:
llvm_unreachable("should not exist post-isel");
}
llvm_unreachable("unhandled machine operand type");
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 70dbe1bc5b95..4f7a0ab4e220 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1960,10 +1960,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
assert(RS && "Register scavenging not provided");
- const TargetRegisterClass *RC = &ARM::GPRRegClass;
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ const TargetRegisterClass &RC = ARM::GPRRegClass;
+ unsigned Size = TRI->getSpillSize(RC);
+ unsigned Align = TRI->getSpillAlignment(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
}
}
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 165e9b7378c7..382f881f7741 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3358,8 +3358,12 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- // FIXME: handle "fence singlethread" more efficiently.
SDLoc dl(Op);
+ ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
+ auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
+ if (Scope == SynchronizationScope::SingleThread)
+ return Op;
+
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
@@ -9476,8 +9480,11 @@ AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
- // Don't generate vpaddl+vmovn; we'll match it to vpadd later.
- if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
+ // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
+ // we're using the entire input vector, otherwise there's a size/legality
+ // mismatch somewhere.
+ if (nextIndex != Vec.getValueType().getVectorNumElements() ||
+ Vec.getValueType().getVectorElementType() == VT.getVectorElementType())
return SDValue();
// Create VPADDL node.
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 3b3606ef462a..a0e2ac4cbc6f 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -32,8 +32,8 @@ using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI() {}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoop(MCInst &NopInst) const {
if (hasNOP()) {
NopInst.setOpcode(ARM::HINT);
NopInst.addOperand(MCOperand::createImm(0));
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 4b1b7097b18d..c87fb97448c9 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -25,8 +25,8 @@ class ARMInstrInfo : public ARMBaseInstrInfo {
public:
explicit ARMInstrInfo(const ARMSubtarget &STI);
- /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ /// Return the noop instruction to use for a noop.
+ void getNoop(MCInst &NopInst) const override;
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 703e8071b177..9d8ee5c3f9dc 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -5975,3 +5975,10 @@ def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
NoItinerary, []>, Sched<[]>;
}
+
+def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary,
+ [(atomic_fence imm:$ordering, 0)]> {
+ let hasSideEffects = 1;
+ let Size = 0;
+ let AsmString = "@ COMPILER BARRIER";
+}
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index f2f426e86701..8048c758e998 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -953,7 +953,7 @@ let isAdd = 1 in {
/// These opcodes will be converted to the real non-S opcodes by
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
let hasPostISelHook = 1, Defs = [CPSR] in {
- let isCommutable = 1 in
+ let isCommutable = 1, Uses = [CPSR] in
def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
2, IIC_iALUr,
[(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm,
@@ -1292,6 +1292,7 @@ def tSUBrr : // A8.6.212
/// These opcodes will be converted to the real non-S opcodes by
/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
let hasPostISelHook = 1, Defs = [CPSR] in {
+ let Uses = [CPSR] in
def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
2, IIC_iALUr,
[(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 816596b85721..1c13d51a468e 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -47,12 +47,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
unsigned SrcReg = I.getOperand(1).getReg();
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
(void)SrcSize;
- assert((DstSize == SrcSize ||
- // Copies are a means to setup initial types, the number of
- // bits may not exactly match.
- (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- DstSize <= SrcSize)) &&
- "Copy with different width?!");
+ // We use copies for trunc, so it's ok for the size of the destination to be
+ // smaller (the higher bits will just be undefined).
+ assert(DstSize <= SrcSize && "Copy with different width?!");
assert((RegBank->getID() == ARM::GPRRegBankID ||
RegBank->getID() == ARM::FPRRegBankID) &&
@@ -294,6 +291,28 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
}
break;
}
+ case G_TRUNC: {
+ // The high bits are undefined, so there's nothing special to do, just
+ // treat it as a copy.
+ auto SrcReg = I.getOperand(1).getReg();
+ auto DstReg = I.getOperand(0).getReg();
+
+ const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+
+ if (SrcRegBank.getID() != DstRegBank.getID()) {
+ DEBUG(dbgs() << "G_TRUNC operands on different register banks\n");
+ return false;
+ }
+
+ if (SrcRegBank.getID() != ARM::GPRRegBankID) {
+ DEBUG(dbgs() << "G_TRUNC on non-GPR not supported yet\n");
+ return false;
+ }
+
+ I.setDesc(TII.get(COPY));
+ return selectCopy(I, TII, MRI, TRI, RBI);
+ }
case G_ADD:
case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
@@ -313,6 +332,16 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
}
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
break;
+ case G_SDIV:
+ assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation");
+ I.setDesc(TII.get(ARM::SDIV));
+ MIB.add(predOps(ARMCC::AL));
+ break;
+ case G_UDIV:
+ assert(TII.getSubtarget().hasDivideInARMMode() && "Unsupported operation");
+ I.setDesc(TII.get(ARM::UDIV));
+ MIB.add(predOps(ARMCC::AL));
+ break;
case G_FADD:
if (!selectFAdd(MIB, TII, MRI))
return false;
@@ -332,6 +361,18 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
"Expected constant to live in a GPR");
I.setDesc(TII.get(ARM::MOVi));
MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+
+ auto &Val = I.getOperand(1);
+ if (Val.isCImm()) {
+ if (Val.getCImm()->getBitWidth() > 32)
+ return false;
+ Val.ChangeToImmediate(Val.getCImm()->getZExtValue());
+ }
+
+ if (!Val.isImm()) {
+ return false;
+ }
+
break;
}
case G_STORE:
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index fe9681439e6b..9b86030fdd29 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -13,6 +13,8 @@
#include "ARMLegalizerInfo.h"
#include "ARMSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
@@ -47,6 +49,18 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
for (auto Ty : {s1, s8, s16, s32})
setAction({Op, Ty}, Legal);
+ for (unsigned Op : {G_SDIV, G_UDIV}) {
+ for (auto Ty : {s8, s16})
+ // FIXME: We need WidenScalar here, but in the case of targets with
+ // software division we'll also need Libcall afterwards. Treat as Custom
+ // until we have better support for chaining legalization actions.
+ setAction({Op, Ty}, Custom);
+ if (ST.hasDivideInARMMode())
+ setAction({Op, s32}, Legal);
+ else
+ setAction({Op, s32}, Libcall);
+ }
+
for (unsigned Op : {G_SEXT, G_ZEXT}) {
setAction({Op, s32}, Legal);
for (auto Ty : {s1, s8, s16})
@@ -75,3 +89,48 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
computeTables();
}
+
+bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ using namespace TargetOpcode;
+
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case G_SDIV:
+ case G_UDIV: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty != LLT::scalar(16) && Ty != LLT::scalar(8))
+ return false;
+
+ // We need to widen to 32 bits and then maybe, if the target requires,
+ // transform into a libcall.
+ LegalizerHelper Helper(MIRBuilder.getMF());
+
+ MachineInstr *NewMI = nullptr;
+ Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) {
+ // Store the new, 32-bit div instruction.
+ if (MI->getOpcode() == G_SDIV || MI->getOpcode() == G_UDIV)
+ NewMI = MI;
+ });
+
+ auto Result = Helper.widenScalar(MI, 0, LLT::scalar(32));
+ Helper.MIRBuilder.stopRecordingInsertions();
+ if (Result == LegalizerHelper::UnableToLegalize) {
+ return false;
+ }
+ assert(NewMI && "Couldn't find widened instruction");
+ assert((NewMI->getOpcode() == G_SDIV || NewMI->getOpcode() == G_UDIV) &&
+ "Unexpected widened instruction");
+ assert(MRI.getType(NewMI->getOperand(0).getReg()).getSizeInBits() == 32 &&
+ "Unexpected type for the widened instruction");
+
+ Result = Helper.legalizeInstrStep(*NewMI);
+ if (Result == LegalizerHelper::UnableToLegalize) {
+ return false;
+ }
+ return true;
+ }
+ }
+}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index 0b8a608a6bde..a9bdd367737e 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -24,6 +24,9 @@ class ARMSubtarget;
class ARMLegalizerInfo : public LegalizerInfo {
public:
ARMLegalizerInfo(const ARMSubtarget &ST);
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 0fd98268723a..9e9c1ba6c114 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -211,11 +211,9 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
.addImm(ARMCC::AL).addReg(0));
MCInst Noop;
- Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
+ Subtarget->getInstrInfo()->getNoop(Noop);
for (int8_t I = 0; I < NoopsInSledCount; I++)
- {
OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
- }
OutStreamer->EmitLabel(Target);
recordSled(CurSled, MI, Kind);
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index e47bd3a8963e..7325817d446b 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -221,8 +221,11 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_ADD:
case G_SUB:
case G_MUL:
+ case G_SDIV:
+ case G_UDIV:
case G_SEXT:
case G_ZEXT:
+ case G_TRUNC:
case G_GEP:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 27bff4d75acf..0ebf55924647 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -24,8 +24,8 @@ using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI() {}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(ARM::tMOVr);
NopInst.addOperand(MCOperand::createReg(ARM::R8));
NopInst.addOperand(MCOperand::createReg(ARM::R8));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 931914ad2799..e8d9a9c4ff14 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -25,8 +25,8 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
- /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ /// Return the noop instruction to use for a noop.
+ void getNoop(MCInst &NopInst) const override;
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 818ba85c7d40..2e2dfe035e26 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -32,8 +32,8 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI() {}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(ARM::tHINT);
NopInst.addOperand(MCOperand::createImm(0));
NopInst.addOperand(MCOperand::createImm(ARMCC::AL));
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 15d63300b6a2..c834ba73bfea 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -26,8 +26,8 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
- /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ /// Return the noop instruction to use for a noop.
+ void getNoop(MCInst &NopInst) const override;
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index 50bb50b44f27..d6491ce5c3bf 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -112,7 +112,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- unsigned BytesPerReg = TRI.getMinimalPhysRegClass(Reg)->getSize();
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ unsigned BytesPerReg = TRI.getRegSizeInBits(*RC) / 8;
assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported.");
unsigned RegIdx = ByteNumber / BytesPerReg;
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 13080a5d72f0..540e05a92997 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -88,6 +88,9 @@ private:
unsigned ArithOpcode,
Block &MBB,
BlockIt MBBI);
+
+ /// Scavenges a free GPR8 register for use.
+ unsigned scavengeGPR8(MachineInstr &MI);
};
char AVRExpandPseudo::ID = 0;
@@ -577,24 +580,43 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned OpLo, OpHi, DstLoReg, DstHiReg;
unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned TmpReg = 0; // 0 for no temporary register
unsigned SrcReg = MI.getOperand(1).getReg();
- bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
OpLo = AVR::LDRdPtr;
OpHi = AVR::LDDRdPtrQ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
+ // Use a temporary register if src and dst registers are the same.
+ if (DstReg == SrcReg)
+ TmpReg = scavengeGPR8(MI);
+
+ unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
+ unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
+ // Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(CurDstLoReg, RegState::Define)
.addReg(SrcReg);
+ // Push low byte onto stack if necessary.
+ if (TmpReg)
+ buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg);
+
+ // Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(CurDstHiReg, RegState::Define)
.addReg(SrcReg, getKillRegState(SrcIsKill))
.addImm(1);
+ if (TmpReg) {
+ // Move the high byte into the final destination.
+ buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg);
+
+ // Move the low byte from the scratch space into the final destination.
+ buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
+ }
+
MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -669,9 +691,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned OpLo, OpHi, DstLoReg, DstHiReg;
unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned TmpReg = 0; // 0 for no temporary register
unsigned SrcReg = MI.getOperand(1).getReg();
unsigned Imm = MI.getOperand(2).getImm();
- bool DstIsDead = MI.getOperand(0).isDead();
bool SrcIsKill = MI.getOperand(1).isKill();
OpLo = AVR::LDDRdPtrQ;
OpHi = AVR::LDDRdPtrQ;
@@ -679,60 +701,35 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
assert(Imm <= 63 && "Offset is out of range");
- MachineInstr *MIBLO, *MIBHI;
-
- // HACK: We shouldn't have instances of this instruction
- // where src==dest because the instruction itself is
- // marked earlyclobber. We do however get this instruction when
- // loading from stack slots where the earlyclobber isn't useful.
- //
- // In this case, just use a temporary register.
- if (DstReg == SrcReg) {
- RegScavenger RS;
-
- RS.enterBasicBlock(MBB);
- RS.forward(MBBI);
-
- BitVector Candidates =
- TRI->getAllocatableSet
- (*MBB.getParent(), &AVR::GPR8RegClass);
-
- // Exclude all the registers being used by the instruction.
- for (MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- Candidates.reset(MO.getReg());
- }
-
- BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass);
- Available &= Candidates;
+ // Use a temporary register if src and dst registers are the same.
+ if (DstReg == SrcReg)
+ TmpReg = scavengeGPR8(MI);
- signed TmpReg = Available.find_first();
- assert(TmpReg != -1 && "ran out of registers");
+ unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg;
+ unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg;
- MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(TmpReg, RegState::Define)
- .addReg(SrcReg)
- .addImm(Imm);
+ // Load low byte.
+ auto MIBLO = buildMI(MBB, MBBI, OpLo)
+ .addReg(CurDstLoReg, RegState::Define)
+ .addReg(SrcReg)
+ .addImm(Imm);
- buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstLoReg).addReg(TmpReg);
+ // Push low byte onto stack if necessary.
+ if (TmpReg)
+ buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg);
- MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(TmpReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addImm(Imm + 1);
+ // Load high byte.
+ auto MIBHI = buildMI(MBB, MBBI, OpHi)
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(Imm + 1);
+ if (TmpReg) {
+ // Move the high byte into the final destination.
buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg);
- } else {
- MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg)
- .addImm(Imm);
- MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addImm(Imm + 1);
+ // Move the low byte from the scratch space into the final destination.
+ buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg);
}
MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -819,6 +816,32 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
});
}
+unsigned AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ RegScavenger RS;
+
+ RS.enterBasicBlock(MBB);
+ RS.forward(MI);
+
+ BitVector Candidates =
+ TRI->getAllocatableSet
+ (*MBB.getParent(), &AVR::GPR8RegClass);
+
+ // Exclude all the registers being used by the instruction.
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ Candidates.reset(MO.getReg());
+ }
+
+ BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass);
+ Available &= Candidates;
+
+ signed Reg = Available.find_first();
+ assert(Reg != -1 && "ran out of registers");
+ return Reg;
+}
+
template<>
bool AVRExpandPseudo::expand<AVR::AtomicLoad8>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI);
@@ -948,7 +971,6 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
unsigned DstReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(1).getReg();
- bool DstIsKill = MI.getOperand(0).isKill();
bool SrcIsKill = MI.getOperand(1).isKill();
OpLo = AVR::STPtrRr;
OpHi = AVR::STDPtrQRr;
@@ -960,7 +982,7 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
.addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg)
.addImm(1)
.addReg(SrcHiReg, getKillRegState(SrcIsKill));
diff --git a/lib/Target/AVR/AVRFrameLowering.cpp b/lib/Target/AVR/AVRFrameLowering.cpp
index b8cb2215ddb4..ab42a7aa9901 100644
--- a/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/lib/Target/AVR/AVRFrameLowering.cpp
@@ -239,7 +239,7 @@ bool AVRFrameLowering::spillCalleeSavedRegisters(
unsigned Reg = CSI[i - 1].getReg();
bool IsNotLiveIn = !MBB.isLiveIn(Reg);
- assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 &&
+ assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 &&
"Invalid register size");
// Add the callee-saved register as live-in only if it is not already a
@@ -277,7 +277,7 @@ bool AVRFrameLowering::restoreCalleeSavedRegisters(
for (const CalleeSavedInfo &CCSI : CSI) {
unsigned Reg = CCSI.getReg();
- assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 &&
+ assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 &&
"Invalid register size");
BuildMI(MBB, MI, DL, TII.get(AVR::POPRd), Reg);
diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp
index 88f889260cce..afba66b2e69b 100644
--- a/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/lib/Target/AVR/AVRInstrInfo.cpp
@@ -142,9 +142,9 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MFI.getObjectAlignment(FrameIndex));
unsigned Opcode = 0;
- if (RC->hasType(MVT::i8)) {
+ if (TRI->isTypeLegalForClass(*RC, MVT::i8)) {
Opcode = AVR::STDPtrQRr;
- } else if (RC->hasType(MVT::i16)) {
+ } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) {
Opcode = AVR::STDWPtrQRr;
} else {
llvm_unreachable("Cannot store this register into a stack slot!");
@@ -176,9 +176,9 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MFI.getObjectAlignment(FrameIndex));
unsigned Opcode = 0;
- if (RC->hasType(MVT::i8)) {
+ if (TRI->isTypeLegalForClass(*RC, MVT::i8)) {
Opcode = AVR::LDDRdPtrQ;
- } else if (RC->hasType(MVT::i16)) {
+ } else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) {
// Opcode = AVR::LDDWRdPtrQ;
//:FIXME: remove this once PR13375 gets fixed
Opcode = AVR::LDDWRdYQ;
diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp
index 48798bd4a1da..5cc7eaf8add3 100644
--- a/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -78,11 +78,12 @@ BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const TargetRegisterClass *
AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &MF) const {
- if (RC->hasType(MVT::i16)) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (TRI->isTypeLegalForClass(*RC, MVT::i16)) {
return &AVR::DREGSRegClass;
}
- if (RC->hasType(MVT::i8)) {
+ if (TRI->isTypeLegalForClass(*RC, MVT::i8)) {
return &AVR::GPR8RegClass;
}
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index cb3049bf1500..07767d1037a9 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -347,7 +347,7 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub);
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS);
- uint16_t BW = RC->getSize()*8;
+ uint16_t BW = TRI.getRegSizeInBits(*RC);
return BW;
}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index fda23f8f6b05..c8483f7e6e76 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -286,9 +286,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
const MachineFunction &MF = *MI.getParent()->getParent();
const auto &HST = MF.getSubtarget<HexagonSubtarget>();
- unsigned VectorSize = HST.useHVXSglOps()
- ? Hexagon::VectorRegsRegClass.getSize()
- : Hexagon::VectorRegs128BRegClass.getSize();
+ const auto &VecRC = HST.useHVXSglOps() ? Hexagon::VectorRegsRegClass
+ : Hexagon::VectorRegs128BRegClass;
+ unsigned VectorSize = HST.getRegisterInfo()->getSpillSize(VecRC);
switch (Inst.getOpcode()) {
default: return;
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 61f290ca98d7..8502bf24c02f 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -407,7 +407,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
if (RR.Sub == 0) {
Begin = 0;
- Width = RC->getSize()*8;
+ Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC);
return true;
}
@@ -417,7 +417,7 @@ bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
case Hexagon::DoubleRegsRegClassID:
case Hexagon::VecDblRegsRegClassID:
case Hexagon::VecDblRegs128BRegClassID:
- Width = RC->getSize()*8 / 2;
+ Width = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 2;
if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)
Begin = Width;
break;
@@ -1054,8 +1054,8 @@ namespace {
class RedundantInstrElimination : public Transformation {
public:
RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
- MachineRegisterInfo &mri)
- : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
@@ -1070,6 +1070,7 @@ namespace {
bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
const HexagonInstrInfo &HII;
+ const HexagonRegisterInfo &HRI;
MachineRegisterInfo &MRI;
BitTracker &BT;
};
@@ -1262,7 +1263,7 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
assert(MI.getOperand(OpN).isReg());
BitTracker::RegisterRef RR = MI.getOperand(OpN);
const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
- uint16_t Width = RC->getSize()*8;
+ uint16_t Width = HRI.getRegSizeInBits(*RC);
if (!GotBits)
T.set(Begin, Begin+Width);
@@ -2173,8 +2174,10 @@ bool BitSimplification::genBitSplit(MachineInstr *MI,
const RegisterSet &AVs) {
if (!GenBitSplit)
return false;
- if (CountBitSplit >= MaxBitSplit)
- return false;
+ if (MaxBitSplit.getNumOccurrences()) {
+ if (CountBitSplit >= MaxBitSplit)
+ return false;
+ }
unsigned Opc = MI->getOpcode();
switch (Opc) {
@@ -2253,7 +2256,8 @@ bool BitSimplification::genBitSplit(MachineInstr *MI,
continue;
// Generate bitsplit where S is defined.
- CountBitSplit++;
+ if (MaxBitSplit.getNumOccurrences())
+ CountBitSplit++;
MachineInstr *DefS = MRI.getVRegDef(S);
assert(DefS != nullptr);
DebugLoc DL = DefS->getDebugLoc();
@@ -2379,9 +2383,11 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
const RegisterSet &AVs) {
if (!GenExtract)
return false;
- if (CountExtract >= MaxExtract)
- return false;
- CountExtract++;
+ if (MaxExtract.getNumOccurrences()) {
+ if (CountExtract >= MaxExtract)
+ return false;
+ CountExtract++;
+ }
unsigned W = RC.width();
unsigned RW = W;
@@ -2651,7 +2657,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
Changed |= visitBlock(Entry, ImmG, AIG);
RegisterSet ARE; // Available registers for RIE.
- RedundantInstrElimination RIE(BT, HII, MRI);
+ RedundantInstrElimination RIE(BT, HII, HRI, MRI);
bool Ried = visitBlock(Entry, RIE, ARE);
if (Ried) {
Changed = true;
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index d8ba5dcd35ad..9f8c9ded8127 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -559,10 +559,10 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
}
unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
- switch (RC->getSize()) {
- case 4:
+ switch (TRI->getRegSizeInBits(*RC)) {
+ case 32:
return IfTrue ? A2_tfrt : A2_tfrf;
- case 8:
+ case 64:
return IfTrue ? A2_tfrpt : A2_tfrpf;
}
llvm_unreachable("Invalid register operand");
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 0e2380f4316a..a04aca4afa0f 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1425,7 +1425,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
if (!SRegs[S->Reg])
continue;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
- int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), S->Offset);
+ int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
MinOffset = std::min(MinOffset, S->Offset);
CSI.push_back(CalleeSavedInfo(S->Reg, FI));
SRegs[S->Reg] = false;
@@ -1437,11 +1437,12 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
unsigned R = x;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
- int Off = MinOffset - RC->getSize();
- unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
+ unsigned Size = TRI->getSpillSize(*RC);
+ int Off = MinOffset - Size;
+ unsigned Align = std::min(TRI->getSpillAlignment(*RC), getStackAlignment());
assert(isPowerOf2_32(Align));
Off &= -Align;
- int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), Off);
+ int FI = MFI.CreateFixedSpillStackObject(Size, Off);
MinOffset = std::min(MinOffset, Off);
CSI.push_back(CalleeSavedInfo(R, FI));
SRegs[R] = false;
@@ -1677,10 +1678,10 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
int FI = MI->getOperand(0).getIndex();
bool Is128B = HST.useHVXDblOps();
- auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
- : &Hexagon::VectorRegs128BRegClass;
- unsigned Size = RC->getSize();
- unsigned NeedAlign = RC->getAlignment();
+ const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass
+ : Hexagon::VectorRegs128BRegClass;
+ unsigned Size = HRI.getSpillSize(RC);
+ unsigned NeedAlign = HRI.getSpillAlignment(RC);
unsigned HasAlign = MFI.getObjectAlignment(FI);
unsigned StoreOpc;
@@ -1734,10 +1735,10 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
int FI = MI->getOperand(1).getIndex();
bool Is128B = HST.useHVXDblOps();
- auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
- : &Hexagon::VectorRegs128BRegClass;
- unsigned Size = RC->getSize();
- unsigned NeedAlign = RC->getAlignment();
+ const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass
+ : Hexagon::VectorRegs128BRegClass;
+ unsigned Size = HRI.getSpillSize(RC);
+ unsigned NeedAlign = HRI.getSpillAlignment(RC);
unsigned HasAlign = MFI.getObjectAlignment(FI);
unsigned LoadOpc;
@@ -1777,16 +1778,16 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
if (!MI->getOperand(0).isFI())
return false;
+ auto &HRI = *HST.getRegisterInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned SrcR = MI->getOperand(2).getReg();
bool IsKill = MI->getOperand(2).isKill();
int FI = MI->getOperand(0).getIndex();
bool Is128B = HST.useHVXDblOps();
- auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
- : &Hexagon::VectorRegs128BRegClass;
-
- unsigned NeedAlign = RC->getAlignment();
+ const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass
+ : Hexagon::VectorRegs128BRegClass;
+ unsigned NeedAlign = HRI.getSpillAlignment(RC);
unsigned HasAlign = MFI.getObjectAlignment(FI);
unsigned StoreOpc;
@@ -1815,15 +1816,15 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
if (!MI->getOperand(1).isFI())
return false;
+ auto &HRI = *HST.getRegisterInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned DstR = MI->getOperand(0).getReg();
int FI = MI->getOperand(1).getIndex();
bool Is128B = HST.useHVXDblOps();
- auto *RC = !Is128B ? &Hexagon::VectorRegsRegClass
- : &Hexagon::VectorRegs128BRegClass;
-
- unsigned NeedAlign = RC->getAlignment();
+ const auto &RC = !Is128B ? Hexagon::VectorRegsRegClass
+ : Hexagon::VectorRegs128BRegClass;
+ unsigned NeedAlign = HRI.getSpillAlignment(RC);
unsigned HasAlign = MFI.getObjectAlignment(FI);
unsigned LoadOpc;
@@ -1932,7 +1933,7 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
continue;
unsigned Num = RC == &Hexagon::IntRegsRegClass ? NumberScavengerSlots : 1;
- unsigned S = RC->getSize(), A = RC->getAlignment();
+ unsigned S = HRI.getSpillSize(*RC), A = HRI.getSpillAlignment(*RC);
for (unsigned i = 0; i < Num; i++) {
int NewFI = MFI.CreateSpillStackObject(S, A);
RS->addScavengingFrameIndex(NewFI);
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index b5948475e1f7..1829c5da02a6 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -26,6 +26,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -1206,10 +1207,9 @@ bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
if (!T)
return false;
- unsigned BW = T->getBitWidth();
- APInt K0(BW, 0), K1(BW, 0);
- computeKnownBits(V, K0, K1, DL);
- return K0.countLeadingOnes() >= IterCount;
+ KnownBits Known(T->getBitWidth());
+ computeKnownBits(V, Known, DL);
+ return Known.Zero.countLeadingOnes() >= IterCount;
}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index c0c29b992238..22fc2474fae6 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -122,6 +122,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
}
let usesCustomInserter = 1 in {
+ let Uses = [SR] in {
def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
"# Select8 PSEUDO",
[(set GR8:$dst,
@@ -130,6 +131,7 @@ let usesCustomInserter = 1 in {
"# Select16 PSEUDO",
[(set GR16:$dst,
(MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
+ }
let Defs = [SR] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 2a9d96205eb9..134f7ac3aea3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -273,9 +273,9 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
// size of stack area to which FP callee-saved regs are saved.
- unsigned CPURegSize = Mips::GPR32RegClass.getSize();
- unsigned FGR32RegSize = Mips::FGR32RegClass.getSize();
- unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
+ unsigned CPURegSize = TRI->getRegSizeInBits(Mips::GPR32RegClass) / 8;
+ unsigned FGR32RegSize = TRI->getRegSizeInBits(Mips::FGR32RegClass) / 8;
+ unsigned AFGR64RegSize = TRI->getRegSizeInBits(Mips::AFGR64RegClass) / 8;
bool HasAFGR64Reg = false;
unsigned CSFPRegsSize = 0;
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index 7af988c1f64d..cb9f676c237a 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -38,7 +38,7 @@ static bool isF128SoftLibCall(const char *CallSym) {
/// This function returns true if Ty is fp128, {f128} or i128 which was
/// originally a fp128.
-static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) {
+static bool originalTypeIsF128(const Type *Ty, const char *Func) {
if (Ty->isFP128Ty())
return true;
@@ -46,12 +46,9 @@ static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) {
Ty->getStructElementType(0)->isFP128Ty())
return true;
- const ExternalSymbolSDNode *ES =
- dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode);
-
// If the Ty is i128 and the function being called is a long double emulation
// routine, then the original type is f128.
- return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
+ return (Func && Ty->isIntegerTy(128) && isF128SoftLibCall(Func));
}
MipsCCState::SpecialCallingConvType
@@ -73,11 +70,11 @@ MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
void MipsCCState::PreAnalyzeCallResultForF128(
const SmallVectorImpl<ISD::InputArg> &Ins,
- const TargetLowering::CallLoweringInfo &CLI) {
+ const Type *RetTy, const char *Call) {
for (unsigned i = 0; i < Ins.size(); ++i) {
OriginalArgWasF128.push_back(
- originalTypeIsF128(CLI.RetTy, CLI.Callee.getNode()));
- OriginalArgWasFloat.push_back(CLI.RetTy->isFloatingPointTy());
+ originalTypeIsF128(RetTy, Call));
+ OriginalArgWasFloat.push_back(RetTy->isFloatingPointTy());
}
}
@@ -99,10 +96,10 @@ void MipsCCState::PreAnalyzeReturnForF128(
void MipsCCState::PreAnalyzeCallOperands(
const SmallVectorImpl<ISD::OutputArg> &Outs,
std::vector<TargetLowering::ArgListEntry> &FuncArgs,
- const SDNode *CallNode) {
+ const char *Func) {
for (unsigned i = 0; i < Outs.size(); ++i) {
OriginalArgWasF128.push_back(
- originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
+ originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, Func));
OriginalArgWasFloat.push_back(
FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
CallOperandIsFixed.push_back(Outs[i].IsFixed);
diff --git a/lib/Target/Mips/MipsCCState.h b/lib/Target/Mips/MipsCCState.h
index 081c393a09be..77ecc65b2eee 100644
--- a/lib/Target/Mips/MipsCCState.h
+++ b/lib/Target/Mips/MipsCCState.h
@@ -31,7 +31,7 @@ private:
/// Identify lowered values that originated from f128 arguments and record
/// this for use by RetCC_MipsN.
void PreAnalyzeCallResultForF128(const SmallVectorImpl<ISD::InputArg> &Ins,
- const TargetLowering::CallLoweringInfo &CLI);
+ const Type *RetTy, const char * Func);
/// Identify lowered values that originated from f128 arguments and record
/// this for use by RetCC_MipsN.
@@ -42,7 +42,7 @@ private:
void
PreAnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
std::vector<TargetLowering::ArgListEntry> &FuncArgs,
- const SDNode *CallNode);
+ const char *Func);
/// Identify lowered values that originated from f128 arguments and record
/// this.
@@ -73,8 +73,8 @@ public:
AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn,
std::vector<TargetLowering::ArgListEntry> &FuncArgs,
- const SDNode *CallNode) {
- PreAnalyzeCallOperands(Outs, FuncArgs, CallNode);
+ const char *Func) {
+ PreAnalyzeCallOperands(Outs, FuncArgs, Func);
CCState::AnalyzeCallOperands(Outs, Fn);
OriginalArgWasF128.clear();
OriginalArgWasFloat.clear();
@@ -99,9 +99,9 @@ public:
}
void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
- CCAssignFn Fn,
- const TargetLowering::CallLoweringInfo &CLI) {
- PreAnalyzeCallResultForF128(Ins, CLI);
+ CCAssignFn Fn, const Type *RetTy,
+ const char *Func) {
+ PreAnalyzeCallResultForF128(Ins, RetTy, Func);
CCState::AnalyzeCallResult(Ins, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index c060cf06099d..a5c7bf7699ea 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1260,8 +1260,10 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
emitInst(Mips::ADJCALLSTACKUP).addImm(16).addImm(0);
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
- CCInfo.AnalyzeCallResult(RetVT, RetCC_Mips);
+ MipsCCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+
+ CCInfo.AnalyzeCallResult(CLI.Ins, RetCC_Mips, CLI.RetTy,
+ CLI.Symbol->getName().data());
// Only handle a single return value.
if (RVLocs.size() != 1)
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index b2cf03976f81..ef05166503b2 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -119,7 +119,7 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
// Conservatively assume all callee-saved registers will be saved.
for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
- unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize();
+ unsigned Size = TRI.getSpillSize(*TRI.getMinimalPhysRegClass(*R));
Offset = alignTo(Offset + Size, Size);
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 93c5f496ce97..8f39ebd42a5c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -2750,7 +2750,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// caller side but removing it breaks the frame size calculation.
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
- CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), Callee.getNode());
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(Callee.getNode());
+ CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(),
+ ES ? ES->getSymbol() : nullptr);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
@@ -2985,7 +2988,11 @@ SDValue MipsTargetLowering::LowerCallResult(
SmallVector<CCValAssign, 16> RVLocs;
MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI);
+
+ const ExternalSymbolSDNode *ES =
+ dyn_cast_or_null<const ExternalSymbolSDNode>(CLI.Callee.getNode());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI.RetTy,
+ ES ? ES->getSymbol() : nullptr);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 5bf4c958c7b9..63034ecab93b 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -40,11 +40,7 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
const TargetRegisterClass *RC =
STI.inMips16Mode()
? &Mips::CPU16RegsRegClass
- : STI.inMicroMipsMode()
- ? STI.hasMips64()
- ? &Mips::GPRMM16_64RegClass
- : &Mips::GPRMM16RegClass
- : static_cast<const MipsTargetMachine &>(MF.getTarget())
+ : static_cast<const MipsTargetMachine &>(MF.getTarget())
.getABI()
.IsN64()
? &Mips::GPR64RegClass
@@ -53,14 +49,15 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
}
void MipsFunctionInfo::createEhDataRegsFI() {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
for (int I = 0; I < 4; ++I) {
- const TargetRegisterClass *RC =
+ const TargetRegisterClass &RC =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64()
- ? &Mips::GPR64RegClass
- : &Mips::GPR32RegClass;
+ ? Mips::GPR64RegClass
+ : Mips::GPR32RegClass;
- EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(RC->getSize(),
- RC->getAlignment(), false);
+ EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlignment(RC), false);
}
}
@@ -69,11 +66,12 @@ void MipsFunctionInfo::createISRRegFI() {
// The current implementation only supports Mips32r2+ not Mips64rX. Status
// is always 32 bits, ErrorPC is 32 or 64 bits dependent on architecture,
// however Mips32r2+ is the supported architecture.
- const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ const TargetRegisterClass &RC = Mips::GPR32RegClass;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
for (int I = 0; I < 2; ++I)
ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject(
- RC->getSize(), RC->getAlignment(), false);
+ TRI.getSpillSize(RC), TRI.getSpillAlignment(RC), false);
}
bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
@@ -93,9 +91,10 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) {
}
int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
if (MoveF64ViaSpillFI == -1) {
MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject(
- RC->getSize(), RC->getAlignment(), false);
+ TRI.getSpillSize(*RC), TRI.getSpillAlignment(*RC), false);
}
return MoveF64ViaSpillFI;
}
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index f33857fe628f..68dcbdfb4211 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -116,9 +116,10 @@ static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) {
/// Return type of register Reg.
static MVT::SimpleValueType getRegTy(unsigned Reg, MachineFunction &MF) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
- assert(RC->vt_end() - RC->vt_begin() == 1);
- return *RC->vt_begin();
+ assert(TRI.legalclasstypes_end(*RC) - TRI.legalclasstypes_begin(*RC) == 1);
+ return *TRI.legalclasstypes_begin(*RC);
}
/// Do the following transformation:
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index ef8d18c6deb1..e765b4625206 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -260,7 +260,8 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
// copy dst_hi, $vr1
unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
- unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2;
+ const TargetRegisterClass *DstRC = RegInfo.getMinimalPhysRegClass(Dst);
+ unsigned VRegSize = RegInfo.getRegSizeInBits(*DstRC) / 16;
const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
unsigned VR1 = MRI.createVirtualRegister(RC);
@@ -858,6 +859,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MipsABIInfo ABI = STI.getABI();
unsigned FP = ABI.GetFramePtr();
@@ -883,10 +885,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (ExpandPseudo(MF).expand()) {
// The spill slot should be half the size of the accumulator. If target is
// mips64, it should be 64-bit, otherwise it should be 32-bt.
- const TargetRegisterClass *RC = STI.hasMips64() ?
- &Mips::GPR64RegClass : &Mips::GPR32RegClass;
- int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(),
- RC->getAlignment(), false);
+ const TargetRegisterClass &RC = STI.hasMips64() ?
+ Mips::GPR64RegClass : Mips::GPR32RegClass;
+ int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC),
+ TRI->getSpillAlignment(RC),
+ false);
RS->addScavengingFrameIndex(FI);
}
@@ -897,10 +900,11 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (isInt<16>(MaxSPOffset))
return;
- const TargetRegisterClass *RC =
- ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
- int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(),
- RC->getAlignment(), false);
+ const TargetRegisterClass &RC =
+ ABI.ArePtrs64bit() ? Mips::GPR64RegClass : Mips::GPR32RegClass;
+ int FI = MF.getFrameInfo().CreateStackObject(TRI->getSpillSize(RC),
+ TRI->getSpillAlignment(RC),
+ false);
RS->addScavengingFrameIndex(FI);
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 91e712a7a54e..ee074798563d 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -207,13 +207,16 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::SDC1;
else if (Mips::FGR64RegClass.hasSubClassEq(RC))
Opc = Mips::SDC164;
- else if (RC->hasType(MVT::v16i8))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8))
Opc = Mips::ST_B;
- else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) ||
+ TRI->isTypeLegalForClass(*RC, MVT::v8f16))
Opc = Mips::ST_H;
- else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) ||
+ TRI->isTypeLegalForClass(*RC, MVT::v4f32))
Opc = Mips::ST_W;
- else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) ||
+ TRI->isTypeLegalForClass(*RC, MVT::v2f64))
Opc = Mips::ST_D;
else if (Mips::LO32RegClass.hasSubClassEq(RC))
Opc = Mips::SW;
@@ -280,13 +283,16 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::LDC1;
else if (Mips::FGR64RegClass.hasSubClassEq(RC))
Opc = Mips::LDC164;
- else if (RC->hasType(MVT::v16i8))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v16i8))
Opc = Mips::LD_B;
- else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v8i16) ||
+ TRI->isTypeLegalForClass(*RC, MVT::v8f16))
Opc = Mips::LD_H;
- else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v4i32) ||
+ TRI->isTypeLegalForClass(*RC, MVT::v4f32))
Opc = Mips::LD_W;
- else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
+ else if (TRI->isTypeLegalForClass(*RC, MVT::v2i64) ||
+ TRI->isTypeLegalForClass(*RC, MVT::v2f64))
Opc = Mips::LD_D;
else if (Mips::HI32RegClass.hasSubClassEq(RC))
Opc = Mips::LW;
@@ -567,8 +573,8 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc,
const MCInstrDesc &Desc = get(Opc);
assert(Desc.NumOperands == 2 && "Unary instruction expected.");
const MipsRegisterInfo *RI = &getRegisterInfo();
- unsigned DstRegSize = getRegClass(Desc, 0, RI, MF)->getSize();
- unsigned SrcRegSize = getRegClass(Desc, 1, RI, MF)->getSize();
+ unsigned DstRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 0, RI, MF));
+ unsigned SrcRegSize = RI->getRegSizeInBits(*getRegClass(Desc, 1, RI, MF));
return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize);
}
diff --git a/lib/Target/Mips/Relocation.txt b/lib/Target/Mips/Relocation.txt
new file mode 100644
index 000000000000..f1a6fd8645f6
--- /dev/null
+++ b/lib/Target/Mips/Relocation.txt
@@ -0,0 +1,125 @@
+MIPS Relocation Principles
+
+In LLVM, there are several elements of the llvm::ISD::NodeType enum
+that deal with addresses and/or relocations. These are defined in
+include/llvm/Target/TargetSelectionDAG.td, namely:
+ GlobalAddress, GlobalTLSAddress, JumpTable, ConstantPool,
+ ExternalSymbol, BlockAddress
+The MIPS backend uses several principles to handle these.
+
+1. Code for lowering addresses references to machine dependent code is
+factored into common code for generating different address forms and
+is called by the relocation model specific lowering function, using
+templated functions. For example:
+
+ // lib/Target/Mips/MipsISelLowering.cpp
+ SDValue MipsTargetLowering::
+ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+
+calls
+
+ template <class NodeTy> // lib/Target/Mips/MipsISelLowering.h
+ SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty,
+ SelectionDAG &DAG, bool IsN32OrN64) const
+
+which calls the overloaded function:
+
+ // lib/Target/Mips/MipsISelLowering.h
+ SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+2. Generic address nodes are lowered to some combination of target
+independent and machine specific SDNodes (for example:
+MipsISD::{Highest, Higher, Hi, Lo}) depending upon relocation model,
+ABI, and compilation options.
+
+The choice of specific instructions that are to be used is delegated
+to ISel which in turn relies on TableGen patterns to choose subtarget
+specific instructions. For example, in getAddrLocal, the pseudo-code
+generated is:
+
+ (add (load (wrapper $gp, %got(sym)), %lo(sym))
+
+where "%lo" represents an instance of an SDNode with opcode
+"MipsISD::Lo", "wrapper" indicates one with opcode "MipsISD::Wrapper",
+and "%got" the global table pointer "getGlobalReg(...)". The "add" is
+"ISD::ADD", not a target dependent one.
+
+3. A TableGen multiclass pattern "MipsHiLoRelocs" is used to define a
+template pattern parameterized over the load upper immediate
+instruction, add operation, the zero register, and register class.
+Here the instantiation of MipsHiLoRelocs in MipsInstrInfo.td is used
+to MIPS32 to compute addresses for the static relocation model.
+
+ // lib/Target/Mips/MipsInstrInfo.td
+ multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
+ Register ZeroReg, RegisterOperand GPROpnd> {
+ def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>;
+ ...
+ def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>;
+ ...
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)),
+ (Addiu GPROpnd:$hi, tglobaladdr:$lo)>;
+ ...
+ }
+ defm : MipsHiLoRelocs<LUi, ADDiu, ZERO, GPR32Opnd>;
+
+ // lib/Target/Mips/Mips64InstrInfo.td
+ defm : MipsHiLoRelocs<LUi64, DADDiu, ZERO_64, GPR64Opnd>, SYM_32;
+
+The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32
+mode, as guarded by the predicate "SYM_32" and also for a submode of
+LP64 where symbols are assumed to be 32 bits wide. A similar
+multiclass for MIPS64 in LP64 mode is also defined:
+
+ // lib/Target/Mips/Mips64InstrInfo.td
+ multiclass MipsHighestHigherHiLoRelocs<Instruction Lui,
+ Instruction Daddiu> {
+ ...
+ def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)),
+ (Lui tglobaladdr:$in)>;
+ ...
+ def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)),
+ (Daddiu ZERO_64, tglobaladdr:$in)>;
+ ...
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ ...
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ ...
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ }
+
+and it is instantiated twice:
+
+ // lib/Target/Mips/Mips64InstrInfo.td
+ defm : MipsHighestHigherHiLoRelocs<LUi64, DADDiu>, SYM_64;
+ // lib/Target/Mips/MicroMips64r6InstrInfo.td
+ defm : MipsHighestHigherHiLoRelocs<LUi64, DADDIU_MM64R6>, SYM_64,
+ ISA_MICROMIPS64R6;
+
+These patterns are used during instruction selection to match
+MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction
+and operands.
+
+More details on how multiclasses in TableGen work can be found in the
+section "Multiclass definitions and instances" in the document
+"TableGen Language Introduction"
+
+4. Instruction definitions are multiply defined to cover the different
+register classes. In some cases, such as LW/LW64, this also accounts
+for the difference in the results of instruction execution. On MIPS32,
+"lw" loads a 32 bit value from memory. On MIPS64, "lw" loads a 32 bit
+value from memory and sign extends the value to 64 bits.
+
+ // lib/Target/Mips/MipsInstrInfo.td
+ def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM;
+ // lib/Target/Mips/Mips64InstrInfo.td
+ def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM;
+
+defines two names "LUi" and "LUi64" with two different register
+classes, but with the same encoding---"LUI_FM". These instructions load a
+16-bit immediate into bits 31-16 and clear the lower 15 bits. On MIPS64,
+the result is sign-extended to 64 bits.
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 3026f0be242d..0f6c2e53e60a 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -38,7 +38,7 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
- if (DestRC->getSize() != SrcRC->getSize())
+ if (RegInfo.getRegSizeInBits(*DestRC) != RegInfo.getRegSizeInBits(*SrcRC))
report_fatal_error("Copy one register into another with a different width");
unsigned Op;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 4c9430a2eca0..2a402deccbca 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1898,12 +1898,13 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
+ const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
+ const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
+ unsigned Size = TRI.getSpillSize(RC);
+ unsigned Align = TRI.getSpillAlignment(RC);
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
// Might we have over-aligned allocas?
bool HasAlVars = MFI.hasVarSizedObjects() &&
@@ -1911,9 +1912,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// These kinds of spills might need two registers.
if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
}
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index f7663d8e5185..4659a2ea8032 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9057,6 +9057,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -9070,7 +9071,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
@@ -9153,7 +9154,6 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 8e159f47ea2e..790a8902b3d2 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -440,8 +440,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode));
}
-/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
-void PPCInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+/// Return the noop instruction to use for a noop.
+void PPCInstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(PPC::NOP);
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index f11aed8fa268..b30d09e03ec4 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -269,7 +269,7 @@ public:
///
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ void getNoop(MCInst &NopInst) const override;
std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const override;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 455d1ee1564a..f120a98e9457 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -3234,6 +3234,7 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -3245,7 +3246,8 @@ SparcTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
unsigned DstReg = MI.getOperand(0).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ (void)TRI;
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c8ff9558cc88..fee008b9572a 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -104,8 +104,9 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineOperand &LowOffsetOp = MI->getOperand(2);
LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
- // Clear the kill flags for the base and index registers in the first
- // instruction.
+ // Clear the kill flags on the registers in the first instruction.
+ if (EarlierMI->getOperand(0).isReg() && EarlierMI->getOperand(0).isUse())
+ EarlierMI->getOperand(0).setIsKill(false);
EarlierMI->getOperand(1).setIsKill(false);
EarlierMI->getOperand(3).setIsKill(false);
@@ -1114,10 +1115,9 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
return nullptr;
unsigned OpNum = Ops[0];
- assert(Size ==
- MF.getRegInfo()
- .getRegClass(MI.getOperand(OpNum).getReg())
- ->getSize() &&
+ assert(Size * 8 ==
+ TRI->getRegSizeInBits(*MF.getRegInfo()
+ .getRegClass(MI.getOperand(OpNum).getReg())) &&
"Invalid size combination");
if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) && OpNum == 0 &&
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index d9c2dba5bace..4178ec0b28f0 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -45,10 +45,11 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
+ const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
MVT::v4i32, MVT::v4f32})
- if (TRC->hasType(T))
+ if (TRI->isTypeLegalForClass(*TRC, T))
return T;
DEBUG(errs() << "Unknown type for register number: " << RegNo);
llvm_unreachable("Unknown register type");
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 324da650e74e..c1cfc82b4a81 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3094,6 +3094,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
else if (IDVal.startswith(".code"))
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
else if (IDVal.startswith(".att_syntax")) {
+ getParser().setParsingInlineAsm(false);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "prefix")
Parser.Lex();
@@ -3106,6 +3107,7 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
} else if (IDVal.startswith(".intel_syntax")) {
getParser().setAssemblerDialect(1);
+ getParser().setParsingInlineAsm(true);
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (Parser.getTok().getString() == "noprefix")
Parser.Lex();
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index fdcc7e1ab7b0..19c93cfff0fe 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -95,7 +95,8 @@ void initializeFixupBWInstPassPass(PassRegistry &);
/// encoding when possible in order to reduce code size.
FunctionPass *createX86EvexToVexInsts();
-InstructionSelector *createX86InstructionSelector(X86Subtarget &,
+InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
+ X86Subtarget &,
X86RegisterBankInfo &);
void initializeEvexToVexInstPassPass(PassRegistry &);
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8fcc8e31d5d4..d2f650cf8f47 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -273,6 +273,16 @@ def FeatureFastSHLDRotate
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+ : SubtargetFeature<
+ "ermsb", "HasERMSB", "true",
+ "REP MOVS/STOS are fast">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -498,6 +508,7 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureAVX2,
FeatureBMI,
FeatureBMI2,
+ FeatureERMSB,
FeatureFMA,
FeatureLZCNT,
FeatureMOVBE,
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 137ef166aaeb..161bfa7b5474 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -53,7 +53,6 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
return;
}
- SmallVector<uint64_t, 4> BitOffsets;
SmallVector<unsigned, 8> SplitRegs;
EVT PartVT = TLI.getRegisterType(Context, VT);
@@ -64,8 +63,10 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)),
PartTy, OrigArg.Flags};
SplitArgs.push_back(Info);
- PerformArgSplit(Info.Reg, PartVT.getSizeInBits() * i);
+ SplitRegs.push_back(Info.Reg);
}
+
+ PerformArgSplit(SplitRegs);
}
namespace {
@@ -112,10 +113,9 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 8> SplitArgs;
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](unsigned Reg, uint64_t Offset) {
- MIRBuilder.buildExtract(Reg, VReg, Offset);
- });
+ splitToValueTypes(
+ OrigArg, SplitArgs, DL, MRI,
+ [&](ArrayRef<unsigned> Regs) { MIRBuilder.buildUnmerge(Regs, VReg); });
FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
@@ -183,22 +183,10 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
for (auto &Arg : F.args()) {
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
setArgFlags(OrigArg, Idx + 1, DL, F);
- LLT Ty = MRI.getType(VRegs[Idx]);
- unsigned Dst = VRegs[Idx];
- bool Split = false;
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](unsigned Reg, uint64_t Offset) {
- if (!Split) {
- Split = true;
- Dst = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildUndef(Dst);
- }
- unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
- MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
- Dst = Tmp;
+ [&](ArrayRef<unsigned> Regs) {
+ MIRBuilder.buildMerge(VRegs[Idx], Regs);
});
- if (Dst != VRegs[Idx])
- MIRBuilder.buildCopy(VRegs[Idx], Dst);
Idx++;
}
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index 204e6974c702..8a8afb568298 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -34,14 +34,15 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+
private:
/// A function of this type is used to perform value split action.
- typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
+ typedef std::function<void(ArrayRef<unsigned>)> SplitArgTy;
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
SplitArgTy SplitArg) const;
};
-} // End of namespace llvm;
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 036f5d2610e4..b8477810b4c9 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2149,7 +2149,8 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
if (!LHSReg || !RHSReg)
return false;
- unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
+ const TargetRegisterInfo &TRI = *Subtarget->getRegisterInfo();
+ unsigned Opc = X86::getCMovFromCond(CC, TRI.getRegSizeInBits(*RC)/8);
unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
LHSReg, LHSIsKill);
updateValueMap(I, ResultReg);
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 8678a13b95d0..a94045cd536d 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1783,6 +1783,14 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset + FPDelta;
}
+int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ int Adjustment) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ FrameReg = TRI->getStackRegister();
+ return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment;
+}
+
int
X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
int FI, unsigned &FrameReg,
@@ -1839,9 +1847,6 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&
"we don't handle this case!");
- // Fill in FrameReg output argument.
- FrameReg = TRI->getStackRegister();
-
// This is how the math works out:
//
// %rsp grows (i.e. gets lower) left to right. Each box below is
@@ -1866,12 +1871,8 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
// (C - E) == (C - A) - (B - A) + (B - E)
// { Using [1], [2] and [3] above }
// == getObjectOffset - LocalAreaOffset + StackSize
- //
-
- // Get the Offset from the StackPointer
- int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
- return Offset + StackSize;
+ return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize);
}
bool X86FrameLowering::assignCalleeSavedSpillSlots(
@@ -1923,14 +1924,15 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
continue;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ unsigned Size = TRI->getSpillSize(*RC);
+ unsigned Align = TRI->getSpillAlignment(*RC);
// ensure alignment
- SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
+ SpillSlotOffset -= std::abs(SpillSlotOffset) % Align;
// spill into slot
- SpillSlotOffset -= RC->getSize();
- int SlotIndex =
- MFI.CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
+ SpillSlotOffset -= Size;
+ int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset);
CSI[i - 1].setFrameIdx(SlotIndex);
- MFI.ensureMaxAlignment(RC->getAlignment());
+ MFI.ensureMaxAlignment(Align);
}
return true;
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 863dc8b22968..7d214cabad53 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -100,6 +100,8 @@ public:
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
+ int getFrameIndexReferenceSP(const MachineFunction &MF,
+ int FI, unsigned &SPReg, int Adjustment) const;
int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI,
unsigned &FrameReg,
bool IgnoreSPUpdates) const override;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b5f29fb400ef..ada46643a5fe 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5060,8 +5060,8 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
+ return DAG.getBuildVector(
+ ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
@@ -14424,8 +14424,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
// If the input is a buildvector just emit a smaller one.
unsigned ElemsPerChunk = ResVT.getVectorNumElements();
if (In.getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT,
- makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk));
+ return DAG.getBuildVector(
+ ResVT, dl, makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk));
// Everything else is legal.
return Op;
@@ -25944,6 +25944,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
@@ -25960,7 +25961,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
DstReg = MI.getOperand(CurOp++).getReg();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
+ (void)TRI;
unsigned mainDstReg = MRI.createVirtualRegister(RC);
unsigned restoreDstReg = MRI.createVirtualRegister(RC);
@@ -30207,7 +30209,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
- if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
+ if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
TLO)) {
// If we changed the computation somewhere in the DAG, this change will
@@ -33777,7 +33779,7 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+ if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
DCI.CommitTargetLoweringOpt(TLO);
}
@@ -35937,7 +35939,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to
// turn into {ax},{dx}.
// MVT::Other is used to specify clobber names.
- if (Res.second->hasType(VT) || VT == MVT::Other)
+ if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other)
return Res; // Correct type already, nothing to do.
// Get a matching integer of the correct size. i.e. "ax" with MVT::32 should
@@ -35975,11 +35977,11 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64RegClass;
- else if (X86::VR128RegClass.hasType(VT))
+ else if (TRI->isTypeLegalForClass(X86::VR128RegClass, VT))
Res.second = &X86::VR128RegClass;
- else if (X86::VR256RegClass.hasType(VT))
+ else if (TRI->isTypeLegalForClass(X86::VR256RegClass, VT))
Res.second = &X86::VR256RegClass;
- else if (X86::VR512RegClass.hasType(VT))
+ else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT))
Res.second = &X86::VR512RegClass;
else {
// Type mismatch and not a clobber: Return an error;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index bfd21c062aa2..66382014f6e8 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -989,10 +989,12 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // Constraints = "$src1 = $dst"
- def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
- def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
- def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
- def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+ let mayLoad = 1, mayStore = 1 in {
+ def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+ def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+ def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+ def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+ }
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 7b456fd68343..26444dd1f619 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -6284,9 +6284,11 @@ void X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
unsigned FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+ const TargetRegisterClass &RC = *MRI.getRegClass(DstReg);
assert(Cond.size() == 1 && "Invalid Cond array");
unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
- MRI.getRegClass(DstReg)->getSize(),
+ TRI.getRegSizeInBits(RC) / 8,
false /*HasMemoryOperand*/);
BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
}
@@ -6557,7 +6559,7 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
- switch (RC->getSize()) {
+ switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
default:
llvm_unreachable("Unknown spill size");
case 1:
@@ -6603,28 +6605,36 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
case 16: {
- assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
- // If stack is realigned we can use aligned stores.
- if (isStackAligned)
- return load ?
- (HasVLX ? X86::VMOVAPSZ128rm :
- HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
- HasAVX ? X86::VMOVAPSrm :
- X86::MOVAPSrm):
- (HasVLX ? X86::VMOVAPSZ128mr :
- HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
- HasAVX ? X86::VMOVAPSmr :
- X86::MOVAPSmr);
- else
- return load ?
- (HasVLX ? X86::VMOVUPSZ128rm :
- HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
- HasAVX ? X86::VMOVUPSrm :
- X86::MOVUPSrm):
- (HasVLX ? X86::VMOVUPSZ128mr :
- HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
- HasAVX ? X86::VMOVUPSmr :
- X86::MOVUPSmr);
+ if (X86::VR128XRegClass.hasSubClassEq(RC)) {
+ // If stack is realigned we can use aligned stores.
+ if (isStackAligned)
+ return load ?
+ (HasVLX ? X86::VMOVAPSZ128rm :
+ HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
+ HasAVX ? X86::VMOVAPSrm :
+ X86::MOVAPSrm):
+ (HasVLX ? X86::VMOVAPSZ128mr :
+ HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
+ HasAVX ? X86::VMOVAPSmr :
+ X86::MOVAPSmr);
+ else
+ return load ?
+ (HasVLX ? X86::VMOVUPSZ128rm :
+ HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
+ HasAVX ? X86::VMOVUPSrm :
+ X86::MOVUPSrm):
+ (HasVLX ? X86::VMOVUPSZ128mr :
+ HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
+ HasAVX ? X86::VMOVUPSmr :
+ X86::MOVUPSmr);
+ }
+ if (X86::BNDRRegClass.hasSubClassEq(RC)) {
+ if (STI.is64Bit())
+ return load ? X86::BNDMOVRM64rm : X86::BNDMOVMR64mr;
+ else
+ return load ? X86::BNDMOVRM32rm : X86::BNDMOVMR32mr;
+ }
+ llvm_unreachable("Unknown 16-byte regclass");
}
case 32:
assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
@@ -6709,9 +6719,9 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= RC->getSize() &&
+ assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= TRI->getSpillSize(*RC) &&
"Stack slot too small for store");
- unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
+ unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
(Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
@@ -6728,7 +6738,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
@@ -6748,7 +6759,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
+ unsigned Alignment = std::max<uint32_t>(TRI->getSpillSize(*RC), 16);
bool isAligned =
(Subtarget.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
@@ -6763,7 +6774,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
@@ -7222,7 +7234,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
else {
unsigned DstReg = Instr.getOperand(0).getReg();
- NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
+ const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+ NewOpc = getCMovFromCond(NewCC, TRI->getRegSizeInBits(*DstRC)/8,
HasMemoryOperand);
}
@@ -7750,7 +7763,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
unsigned DstIdx = (Imm >> 4) & 3;
unsigned SrcIdx = (Imm >> 6) & 3;
- unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+ unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
if (Size <= RCSize && 4 <= Align) {
int PtrOffset = SrcIdx * 4;
unsigned NewImm = (DstIdx << 4) | ZMask;
@@ -7772,7 +7787,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
// To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
// TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
if (OpNum == 2) {
- unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum, &RI, MF);
+ unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
if (Size <= RCSize && 8 <= Align) {
unsigned NewOpCode =
(MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
@@ -7861,7 +7878,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
- unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC = getRegClass(MI.getDesc(), OpNum,
+ &RI, MF);
+ unsigned RCSize = TRI.getRegSizeInBits(*RC) / 8;
if (Size < RCSize) {
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -8302,11 +8322,13 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
const MachineFunction &MF) {
unsigned Opc = LoadMI.getOpcode();
unsigned UserOpc = UserMI.getOpcode();
- unsigned RegSize =
- MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg())->getSize();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetRegisterClass *RC =
+ MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg());
+ unsigned RegSize = TRI.getRegSizeInBits(*RC);
if ((Opc == X86::MOVSSrm || Opc == X86::VMOVSSrm || Opc == X86::VMOVSSZrm) &&
- RegSize > 4) {
+ RegSize > 32) {
// These instructions only load 32 bits, we can't fold them if the
// destination register is wider than 32 bits (4 bytes), and its user
// instruction isn't scalar (SS).
@@ -8357,7 +8379,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
}
if ((Opc == X86::MOVSDrm || Opc == X86::VMOVSDrm || Opc == X86::VMOVSDZrm) &&
- RegSize > 8) {
+ RegSize > 64) {
// These instructions only load 64 bits, we can't fold them if the
// destination register is wider than 64 bits (8 bytes), and its user
// instruction isn't scalar (SD).
@@ -8702,6 +8724,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
bool FoldedStore = I->second.second & TB_FOLDED_STORE;
const MCInstrDesc &MCID = get(Opc);
MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
unsigned NumDefs = MCID.NumDefs;
std::vector<SDValue> AddrOps;
@@ -8724,7 +8747,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// Emit the load instruction.
SDNode *Load = nullptr;
if (FoldedLoad) {
- EVT VT = *RC->vt_begin();
+ EVT VT = *TRI.legalclasstypes_begin(*RC);
std::pair<MachineInstr::mmo_iterator,
MachineInstr::mmo_iterator> MMOs =
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
@@ -8736,7 +8759,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
- unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
@@ -8752,7 +8775,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
const TargetRegisterClass *DstRC = nullptr;
if (MCID.getNumDefs() > 0) {
DstRC = getRegClass(MCID, 0, &RI, MF);
- VTs.push_back(*DstRC->vt_begin());
+ VTs.push_back(*TRI.legalclasstypes_begin(*DstRC));
}
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
@@ -8781,7 +8804,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
// memory access is slow above.
- unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
+ unsigned Alignment = std::max<uint32_t>(TRI.getSpillSize(*RC), 16);
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
SDNode *Store =
@@ -9514,7 +9537,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
}
/// Return the noop instruction to use for a noop.
-void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+void X86InstrInfo::getNoop(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 2fee48570ce1..38567831b3a4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -457,7 +457,7 @@ public:
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
- void getNoopForMachoTarget(MCInst &NopInst) const override;
+ void getNoop(MCInst &NopInst) const override;
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index e31d2769047b..c3def461afdc 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -897,6 +897,7 @@ def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
+def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e1bf28cbf612..f22a50200c9a 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4602,17 +4602,17 @@ def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
let isCodeGenOnly = 1 in
def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
} // ExeDomain = SSEPackedInt
@@ -4681,7 +4681,7 @@ def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
VEX;
def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
@@ -4694,7 +4694,7 @@ def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
[], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
} // ExeDomain = SSEPackedInt
@@ -4721,7 +4721,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
[(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
@@ -4811,12 +4811,12 @@ let Predicates = [UseSSE2] in {
}
}
-// These are the correct encodings of the instructions so that we know how to
-// read correct assembly, even though we continue to emit the wrong ones for
-// compatibility with Darwin's buggy assembler.
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of
+// "movq" due to MacOS parsing limitation. In order to parse old assembly, we add
+// these aliases.
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
(MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
(MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
@@ -7144,33 +7144,37 @@ let Predicates = [UseSSE41] in {
/// SS42I_binop_rm - Simple SSE 4.2 binary operator
multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, OpndItins itins,
+ bit Is2Addr = 1> {
def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>;
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>;
def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>;
+ (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG;
+ loadv2i64, i128mem, SSE_INTALU_ITINS_P, 0>,
+ VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG;
+ loadv4i64, i256mem, SSE_INTALU_ITINS_P, 0>,
+ VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, SSE_INTALU_ITINS_P>;
//===----------------------------------------------------------------------===//
// SSE4.2 - String/text Processing Instructions
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index fb9315792892..d0f1b7091da9 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -39,11 +39,16 @@ using namespace llvm;
namespace {
+#define GET_GLOBALISEL_PREDICATE_BITSET
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_PREDICATE_BITSET
+
class X86InstructionSelector : public InstructionSelector {
public:
- X86InstructionSelector(const X86Subtarget &STI,
+ X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
const X86RegisterBankInfo &RBI);
+ void beginFunction(const MachineFunction &MF) override;
bool select(MachineInstr &I) const override;
private:
@@ -70,10 +75,17 @@ private:
bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
+ const X86TargetMachine &TM;
const X86Subtarget &STI;
const X86InstrInfo &TII;
const X86RegisterInfo &TRI;
const X86RegisterBankInfo &RBI;
+ bool OptForSize;
+ bool OptForMinSize;
+
+ PredicateBitset AvailableFeatures;
+ PredicateBitset computeAvailableFeatures(const MachineFunction *MF,
+ const X86Subtarget *Subtarget) const;
#define GET_GLOBALISEL_TEMPORARIES_DECL
#include "X86GenGlobalISel.inc"
@@ -86,10 +98,12 @@ private:
#include "X86GenGlobalISel.inc"
#undef GET_GLOBALISEL_IMPL
-X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
+X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
+ const X86Subtarget &STI,
const X86RegisterBankInfo &RBI)
- : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI)
+ : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false),
+ OptForMinSize(false), AvailableFeatures()
#define GET_GLOBALISEL_TEMPORARIES_INIT
#include "X86GenGlobalISel.inc"
#undef GET_GLOBALISEL_TEMPORARIES_INIT
@@ -181,6 +195,12 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return true;
}
+void X86InstructionSelector::beginFunction(const MachineFunction &MF) {
+ OptForSize = MF.getFunction()->optForSize();
+ OptForMinSize = MF.getFunction()->optForMinSize();
+ AvailableFeatures = computeAvailableFeatures(&MF, &STI);
+}
+
bool X86InstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -571,7 +591,8 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I,
}
InstructionSelector *
-llvm::createX86InstructionSelector(X86Subtarget &Subtarget,
+llvm::createX86InstructionSelector(const X86TargetMachine &TM,
+ X86Subtarget &Subtarget,
X86RegisterBankInfo &RBI) {
- return new X86InstructionSelector(Subtarget, RBI);
+ return new X86InstructionSelector(TM, Subtarget, RBI);
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 9bab9a4cf3ba..1f16f3c9a14d 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -137,25 +137,29 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::FR32RegClassID:
case X86::FR64RegClassID:
// If AVX-512 isn't supported we should only inflate to these classes.
- if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize())
+ if (!Subtarget.hasAVX512() &&
+ getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
return Super;
break;
case X86::VR128RegClassID:
case X86::VR256RegClassID:
// If VLX isn't supported we should only inflate to these classes.
- if (!Subtarget.hasVLX() && Super->getSize() == RC->getSize())
+ if (!Subtarget.hasVLX() &&
+ getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
return Super;
break;
case X86::VR128XRegClassID:
case X86::VR256XRegClassID:
// If VLX isn't support we shouldn't inflate to these classes.
- if (Subtarget.hasVLX() && Super->getSize() == RC->getSize())
+ if (Subtarget.hasVLX() &&
+ getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
return Super;
break;
case X86::FR32XRegClassID:
case X86::FR64XRegClassID:
// If AVX-512 isn't support we shouldn't inflate to these classes.
- if (Subtarget.hasAVX512() && Super->getSize() == RC->getSize())
+ if (Subtarget.hasAVX512() &&
+ getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
return Super;
break;
case X86::GR8RegClassID:
@@ -168,7 +172,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::VR512RegClassID:
// Don't return a super-class that would shrink the spill size.
// That can happen with the vector and float classes.
- if (Super->getSize() == RC->getSize())
+ if (getRegSizeInBits(*Super) == getRegSizeInBits(*RC))
return Super;
}
Super = *I++;
@@ -669,32 +673,28 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
const X86FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned BasePtr;
- unsigned Opc = MI.getOpcode();
- bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
- Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
-
- if (hasBasePointer(MF))
- BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
- else if (needsStackRealignment(MF))
- BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
- else if (AfterFPPop)
- BasePtr = StackPtr;
- else
- BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+ // Determine base register and offset.
+ int FIOffset;
+ unsigned BasePtr;
+ if (MI.isReturn()) {
+ assert((!needsStackRealignment(MF) ||
+ MF.getFrameInfo().isFixedObjectIndex(FrameIndex)) &&
+ "Return instruction can only reference SP relative frame objects");
+ FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0);
+ } else {
+ FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr);
+ }
// LOCAL_ESCAPE uses a single offset, with no register. It only works in the
// simple FP case, and doesn't work with stack realignment. On 32-bit, the
// offset is from the traditional base pointer location. On 64-bit, the
// offset is from the SP at the end of the prologue, not the FP location. This
// matches the behavior of llvm.frameaddress.
- unsigned IgnoredFrameReg;
+ unsigned Opc = MI.getOpcode();
if (Opc == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
- int Offset;
- Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
- FI.ChangeToImmediate(Offset);
+ FI.ChangeToImmediate(FIOffset);
return;
}
@@ -710,15 +710,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// FrameIndex with base register. Add an offset to the offset.
MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
- // Now add the frame object offset to the offset from EBP.
- int FIOffset;
- if (AfterFPPop) {
- // Tail call jmp happens after FP is popped.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
- } else
- FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
-
if (BasePtr == StackPtr)
FIOffset += SPAdj;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 9da8a18965ea..1a72a0ba3a64 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -106,7 +106,6 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SDValue Count;
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
- bool TwoRepStos = false;
if (ValC) {
unsigned ValReg;
uint64_t Val = ValC->getZExtValue() & 255;
@@ -163,20 +162,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
- if (TwoRepStos) {
- InFlag = Chain.getValue(1);
- Count = Size;
- EVT CVT = Count.getValueType();
- SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
- DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl,
- CVT));
- Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
- Left, InFlag);
- InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
- } else if (BytesLeft) {
+ if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
EVT AddrVT = Dst.getValueType();
@@ -195,6 +181,24 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
return Chain;
}
+namespace {
+
+// Represents a cover of a buffer of SizeVal bytes with blocks of size
+// AVT, as well as how many bytes remain (BytesLeft is always smaller than
+// the block size).
+struct RepMovsRepeats {
+ RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) {
+ const unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = SizeVal / UBytes;
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ unsigned Count;
+ unsigned BytesLeft;
+};
+
+} // namespace
+
SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
@@ -229,7 +233,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
MVT AVT;
- if (Align & 1)
+ if (Subtarget.hasERMSB())
+ // If the target has enhanced REPMOVSB, then it's at least as fast to use
+ // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
+ // BytesLeft.
+ AVT = MVT::i8;
+ else if (Align & 1)
AVT = MVT::i8;
else if (Align & 2)
AVT = MVT::i16;
@@ -240,14 +249,18 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
// QWORD aligned
AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
- unsigned UBytes = AVT.getSizeInBits() / 8;
- unsigned CountVal = SizeVal / UBytes;
- SDValue Count = DAG.getIntPtrConstant(CountVal, dl);
- unsigned BytesLeft = SizeVal % UBytes;
+ RepMovsRepeats Repeats(SizeVal, AVT);
+ if (Repeats.BytesLeft > 0 &&
+ DAG.getMachineFunction().getFunction()->optForMinSize()) {
+ // When agressively optimizing for size, avoid generating the code to handle
+ // BytesLeft.
+ AVT = MVT::i8;
+ Repeats = RepMovsRepeats(SizeVal, AVT);
+ }
SDValue InFlag;
Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
- Count, InFlag);
+ DAG.getIntPtrConstant(Repeats.Count, dl), InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
Dst, InFlag);
@@ -262,9 +275,9 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
SmallVector<SDValue, 4> Results;
Results.push_back(RepMovs);
- if (BytesLeft) {
+ if (Repeats.BytesLeft) {
// Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
+ unsigned Offset = SizeVal - Repeats.BytesLeft;
EVT DstVT = Dst.getValueType();
EVT SrcVT = Src.getValueType();
EVT SizeVT = Size.getValueType();
@@ -275,7 +288,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
DAG.getNode(ISD::ADD, dl, SrcVT, Src,
DAG.getConstant(Offset, dl,
SrcVT)),
- DAG.getConstant(BytesLeft, dl, SizeVT),
+ DAG.getConstant(Repeats.BytesLeft, dl,
+ SizeVT),
Align, isVolatile, AlwaysInline, false,
DstPtrInfo.getWithOffset(Offset),
SrcPtrInfo.getWithOffset(Offset)));
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 92a68759195c..4154530d04e7 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -303,6 +303,7 @@ void X86Subtarget::initializeEnvironment() {
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
HasFastSHLDRotate = false;
+ HasERMSB = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d0d88d326949..fd057f36c890 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -232,6 +232,9 @@ protected:
/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate;
+ /// True if the processor has enhanced REP MOVSB/STOSB.
+ bool HasERMSB;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
@@ -472,6 +475,7 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasERMSB() const { return HasERMSB; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 03a1958121ab..623cf38aa951 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -286,7 +286,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
GISel->RegBankInfo.reset(RBI);
- GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI));
+ GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI));
#endif
I->setGISelAccessor(*GISel);
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index a752357400b3..784612038c09 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -575,18 +575,17 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const {
assert(RS && "requiresRegisterScavenging failed");
MachineFrameInfo &MFI = MF.getFrameInfo();
- const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
+ const TargetRegisterClass &RC = XCore::GRRegsRegClass;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
// Reserve slots close to SP or frame pointer for Scavenging spills.
// When using SP for small frames, we don't need any scratch registers.
// When using SP for large frames, we may need 2 scratch registers.
// When using FP, for large or small frames, we may need 1 scratch register.
+ unsigned Size = TRI.getSpillSize(RC);
+ unsigned Align = TRI.getSpillAlignment(RC);
if (XFI->isLargeFrame(MF) || hasFP(MF))
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
if (XFI->isLargeFrame(MF) && !hasFP(MF))
- RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 45437815fa37..2efcd46cd8d4 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1605,7 +1605,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) ||
+ if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne,
TLO))
DCI.CommitTargetLoweringOpt(TLO);
@@ -1622,7 +1622,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(Time, DemandedMask) ||
+ if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) ||
TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne,
TLO))
DCI.CommitTargetLoweringOpt(TLO);
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index e91536ca1e83..75af0e97dfb5 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -10,6 +10,7 @@
#include "XCoreMachineFunctionInfo.h"
#include "XCoreInstrInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -35,13 +36,15 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) {
if (LRSpillSlotSet) {
return LRSpillSlot;
}
- const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
+ const TargetRegisterClass &RC = XCore::GRRegsRegClass;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
if (! MF.getFunction()->isVarArg()) {
// A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
- LRSpillSlot = MFI.CreateFixedObject(RC->getSize(), 0, true);
+ LRSpillSlot = MFI.CreateFixedObject(TRI.getSpillSize(RC), 0, true);
} else {
- LRSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+ LRSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlignment(RC), true);
}
LRSpillSlotSet = true;
return LRSpillSlot;
@@ -51,9 +54,11 @@ int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) {
if (FPSpillSlotSet) {
return FPSpillSlot;
}
- const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
+ const TargetRegisterClass &RC = XCore::GRRegsRegClass;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
- FPSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+ FPSpillSlot = MFI.CreateStackObject(TRI.getSpillSize(RC),
+ TRI.getSpillAlignment(RC), true);
FPSpillSlotSet = true;
return FPSpillSlot;
}
@@ -62,10 +67,13 @@ const int* XCoreFunctionInfo::createEHSpillSlot(MachineFunction &MF) {
if (EHSpillSlotSet) {
return EHSpillSlot;
}
- const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
+ const TargetRegisterClass &RC = XCore::GRRegsRegClass;
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
- EHSpillSlot[0] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
- EHSpillSlot[1] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+ unsigned Size = TRI.getSpillSize(RC);
+ unsigned Align = TRI.getSpillAlignment(RC);
+ EHSpillSlot[0] = MFI.CreateStackObject(Size, Align, true);
+ EHSpillSlot[1] = MFI.CreateStackObject(Size, Align, true);
EHSpillSlotSet = true;
return EHSpillSlot;
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index a2f6e5639d9d..78e71c18fe29 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -17,7 +17,9 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -27,10 +29,21 @@
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
-#define DEBUG_TYPE "partialinlining"
+#define DEBUG_TYPE "partial-inlining"
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+// Command line option to disable partial-inlining. The default is false:
+static cl::opt<bool>
+ DisablePartialInlining("disable-partial-inlining", cl::init(false),
+ cl::Hidden, cl::desc("Disable partial ininling"));
+
+// Command line option to set the maximum number of partial inlining allowed
+// for the module. The default value of -1 means no limit.
+static cl::opt<int> MaxNumPartialInlining(
+ "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of partial inlining. The default is unlimited"));
+
namespace {
struct PartialInlinerImpl {
PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
@@ -39,6 +52,12 @@ struct PartialInlinerImpl {
private:
InlineFunctionInfo IFI;
+ int NumPartialInlining = 0;
+
+ bool IsLimitReached() {
+ return (MaxNumPartialInlining != -1 &&
+ NumPartialInlining >= MaxNumPartialInlining);
+ }
};
struct PartialInlinerLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -66,6 +85,9 @@ struct PartialInlinerLegacyPass : public ModulePass {
Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// First, verify that this function is an unswitching candidate...
+ if (F->hasAddressTaken())
+ return nullptr;
+
BasicBlock *EntryBlock = &F->front();
BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
if (!BR || BR->isUnconditional())
@@ -149,11 +171,29 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
// Inline the top-level if test into all callers.
std::vector<User *> Users(DuplicateFunction->user_begin(),
DuplicateFunction->user_end());
- for (User *User : Users)
+
+ for (User *User : Users) {
+ CallSite CS;
if (CallInst *CI = dyn_cast<CallInst>(User))
- InlineFunction(CI, IFI);
+ CS = CallSite(CI);
else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
- InlineFunction(II, IFI);
+ CS = CallSite(II);
+ else
+ llvm_unreachable("All uses must be calls");
+
+ if (IsLimitReached())
+ continue;
+ NumPartialInlining++;
+
+ OptimizationRemarkEmitter ORE(CS.getCaller());
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ BasicBlock *Block = CS.getParent();
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)
+ << ore::NV("Callee", F) << " partially inlined into "
+ << ore::NV("Caller", CS.getCaller()));
+
+ InlineFunction(CS, IFI);
+ }
// Ditch the duplicate, since we're done with it, and rewrite all remaining
// users (function pointers, etc.) back to the original function.
@@ -166,6 +206,9 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {
}
bool PartialInlinerImpl::run(Module &M) {
+ if (DisablePartialInlining)
+ return false;
+
std::vector<Function *> Worklist;
Worklist.reserve(M.size());
for (Function &F : M)
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index f11b58d1adc4..0d5910ebbfcc 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -282,6 +282,12 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
}
if (!PGOInstrUse.empty())
MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse));
+ // Indirect call promotion that promotes intra-module targets only.
+ // For ThinLTO this is done earlier due to interactions with globalopt
+ // for imported functions. We don't run this at -O0.
+ if (OptLevel > 0)
+ MPM.add(
+ createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
void PassManagerBuilder::addFunctionSimplificationPasses(
legacy::PassManagerBase &MPM) {
@@ -414,11 +420,14 @@ void PassManagerBuilder::populateModulePassManager(
else if (!GlobalExtensions->empty() || !Extensions.empty())
MPM.add(createBarrierNoopPass());
+ addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
+
+ // Rename anon globals to be able to export them in the summary.
+ // This has to be done after we add the extensions to the pass manager
+ // as there could be passes (e.g. Adddress sanitizer) which introduce
+ // new unnamed globals.
if (PrepareForThinLTO)
- // Rename anon globals to be able to export them in the summary.
MPM.add(createNameAnonGlobalPass());
-
- addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -468,16 +477,10 @@ void PassManagerBuilder::populateModulePassManager(
// For SamplePGO in ThinLTO compile phase, we do not want to do indirect
// call promotion as it will change the CFG too much to make the 2nd
// profile annotation in backend more difficult.
- if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) {
- /// PGO instrumentation is added during the compile phase for ThinLTO, do
- /// not run it a second time
+ // PGO instrumentation is added during the compile phase for ThinLTO, do
+ // not run it a second time
+ if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile)
addPGOInstrPasses(MPM);
- // Indirect call promotion that promotes intra-module targets only.
- // For ThinLTO this is done earlier due to interactions with globalopt
- // for imported functions.
- MPM.add(
- createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
- }
if (EnableNonLTOGlobalsModRef)
// We add a module alias analysis pass here. In part due to bugs in the
@@ -677,6 +680,11 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createLoopSinkPass());
// Get rid of LCSSA nodes.
MPM.add(createInstructionSimplifierPass());
+
+ // LoopSink (and other loop passes since the last simplifyCFG) might have
+ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+ MPM.add(createCFGSimplificationPass());
+
addExtensionsToPM(EP_OptimizerLast, MPM);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index e30a4bafb9b0..030461004f56 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -794,6 +795,11 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
if (Opnd->isConstant())
continue;
+ // The constant check above is really for a few special constant
+ // coefficients.
+ if (isa<UndefValue>(Opnd->getSymVal()))
+ continue;
+
const FAddendCoef &CE = Opnd->getCoef();
if (CE.isMinusOne() || CE.isMinusTwo())
NegOpndNum++;
@@ -894,24 +900,22 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
+ KnownBits LHSKnown(BitWidth);
+ computeKnownBits(LHS, LHSKnown, 0, &CxtI);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(RHS, RHSKnown, 0, &CxtI);
// Addition of two 2's complement numbers having opposite signs will never
// overflow.
- if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) ||
- (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1]))
+ if ((LHSKnown.One[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]) ||
+ (LHSKnown.Zero[BitWidth - 1] && RHSKnown.One[BitWidth - 1]))
return true;
// Check if carry bit of addition will not cause overflow.
- if (checkRippleForAdd(LHSKnownZero, RHSKnownZero))
+ if (checkRippleForAdd(LHSKnown.Zero, RHSKnown.Zero))
return true;
- if (checkRippleForAdd(RHSKnownZero, LHSKnownZero))
+ if (checkRippleForAdd(RHSKnown.Zero, LHSKnown.Zero))
return true;
return false;
@@ -931,18 +935,16 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
return true;
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
- APInt LHSKnownZero(BitWidth, 0);
- APInt LHSKnownOne(BitWidth, 0);
- computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, 0, &CxtI);
+ KnownBits LHSKnown(BitWidth);
+ computeKnownBits(LHS, LHSKnown, 0, &CxtI);
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(RHS, RHSKnown, 0, &CxtI);
// Subtraction of two 2's complement numbers having identical signs will
// never overflow.
- if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) ||
- (LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1]))
+ if ((LHSKnown.One[BitWidth - 1] && RHSKnown.One[BitWidth - 1]) ||
+ (LHSKnown.Zero[BitWidth - 1] && RHSKnown.Zero[BitWidth - 1]))
return true;
// TODO: implement logic similar to checkRippleForAdd
@@ -1113,10 +1115,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// a sub and fuse this add with it.
if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
IntegerType *IT = cast<IntegerType>(I.getType());
- APInt LHSKnownOne(IT->getBitWidth(), 0);
- APInt LHSKnownZero(IT->getBitWidth(), 0);
- computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne, 0, &I);
- if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
+ KnownBits LHSKnown(IT->getBitWidth());
+ computeKnownBits(XorLHS, LHSKnown, 0, &I);
+ if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue())
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
}
@@ -1385,39 +1386,58 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
Value *LHSIntVal = LHSConv->getOperand(0);
+ Type *FPType = LHSConv->getType();
+
+ // TODO: This check is overly conservative. In many cases known bits
+ // analysis can tell us that the result of the addition has less significant
+ // bits than the integer type can hold.
+ auto IsValidPromotion = [](Type *FTy, Type *ITy) {
+ Type *FScalarTy = FTy->getScalarType();
+ Type *IScalarTy = ITy->getScalarType();
+
+ // Do we have enough bits in the significand to represent the result of
+ // the integer addition?
+ unsigned MaxRepresentableBits =
+ APFloat::semanticsPrecision(FScalarTy->getFltSemantics());
+ return IScalarTy->getIntegerBitWidth() <= MaxRepresentableBits;
+ };
// (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
// requires a constant pool load, and generally allows the add to be better
// instcombined.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
- Constant *CI =
- ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
- if (LHSConv->hasOneUse() &&
- ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
- CI, "addconv");
- return new SIToFPInst(NewAdd, I.getType());
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+ if (IsValidPromotion(FPType, LHSIntVal->getType())) {
+ Constant *CI =
+ ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
+ if (LHSConv->hasOneUse() &&
+ ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+ WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+ CI, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
}
- }
// (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
Value *RHSIntVal = RHSConv->getOperand(0);
-
- // Only do this if x/y have the same type, if at least one of them has a
- // single use (so we don't increase the number of int->fp conversions),
- // and if the integer add will not overflow.
- if (LHSIntVal->getType() == RHSIntVal->getType() &&
- (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
- // Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
- RHSIntVal, "addconv");
- return new SIToFPInst(NewAdd, I.getType());
+ // It's enough to check LHS types only because we require int types to
+ // be the same for this transform.
+ if (IsValidPromotion(FPType, LHSIntVal->getType())) {
+ // Only do this if x/y have the same type, if at least one of them has a
+ // single use (so we don't increase the number of int->fp conversions),
+ // and if the integer add will not overflow.
+ if (LHSIntVal->getType() == RHSIntVal->getType() &&
+ (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+ WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
+ // Insert the new integer add.
+ Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+ RHSIntVal, "addconv");
+ return new SIToFPInst(NewAdd, I.getType());
+ }
}
}
}
@@ -1617,10 +1637,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
if (Op0C->isMask()) {
- APInt RHSKnownZero(BitWidth, 0);
- APInt RHSKnownOne(BitWidth, 0);
- computeKnownBits(Op1, RHSKnownZero, RHSKnownOne, 0, &I);
- if ((*Op0C | RHSKnownZero).isAllOnesValue())
+ KnownBits RHSKnown(BitWidth);
+ computeKnownBits(Op1, RHSKnown, 0, &I);
+ if ((*Op0C | RHSKnown.Zero).isAllOnesValue())
return BinaryOperator::CreateXor(Op1, Op0);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3a98e8937bda..a97b5a9ec0bb 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -906,15 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (PredL) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
- case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
- case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
- return LHS;
- }
case ICmpInst::ICMP_NE:
switch (PredR) {
default:
@@ -930,43 +921,15 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
return Builder->CreateICmpSLT(LHS0, LHSC);
break; // (X != 13 & X s< 15) -> no change
- case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
- case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
- return RHS;
case ICmpInst::ICMP_NE:
// Potential folds for this case should already be handled.
break;
}
break;
- case ICmpInst::ICMP_ULT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
- case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
- return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
- case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
- return LHS;
- }
- break;
- case ICmpInst::ICMP_SLT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
- case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
- return LHS;
- }
- break;
case ICmpInst::ICMP_UGT:
switch (PredR) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
- return RHS;
case ICmpInst::ICMP_NE:
if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
return Builder->CreateICmp(PredL, LHS0, RHSC);
@@ -980,9 +943,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
switch (PredR) {
default:
llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
- return RHS;
case ICmpInst::ICMP_NE:
if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
return Builder->CreateICmp(PredL, LHS0, RHSC);
@@ -1234,6 +1194,56 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
return nullptr;
}
+static Instruction *foldAndToXor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::And);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *A, *B;
+
+ // Operand complexity canonicalization guarantees that the 'or' is Op0.
+ // (A | B) & ~(A & B) --> A ^ B
+ // (A | B) & ~(B & A) --> A ^ B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateXor(A, B);
+
+ // (A | ~B) & (~A | B) --> ~(A ^ B)
+ // (A | ~B) & (B | ~A) --> ~(A ^ B)
+ // (~B | A) & (~A | B) --> ~(A ^ B)
+ // (~B | A) & (B | ~A) --> ~(A ^ B)
+ if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+ return nullptr;
+}
+
+static Instruction *foldOrToXor(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.getOpcode() == Instruction::Or);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *A, *B;
+
+ // Operand complexity canonicalization guarantees that the 'and' is Op0.
+ // (A & B) | ~(A | B) --> ~(A ^ B)
+ // (A & B) | ~(B | A) --> ~(A ^ B)
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
+
+ // (A & ~B) | (~A & B) --> A ^ B
+ // (A & ~B) | (B & ~A) --> A ^ B
+ // (~B & A) | (~A & B) --> A ^ B
+ // (~B & A) | (B & ~A) --> A ^ B
+ if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B))))
+ return BinaryOperator::CreateXor(A, B);
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -1247,15 +1257,19 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
- // (A|B)&(A|C) -> A|(B&C) etc
- if (Value *V = SimplifyUsingDistributiveLaws(I))
- return replaceInstUsesWith(I, V);
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
+ // Do this before using distributive laws to catch simple and/or/not patterns.
+ if (Instruction *Xor = foldAndToXor(I, *Builder))
+ return Xor;
+
+ // (A|B)&(A|C) -> A|(B&C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
@@ -1366,19 +1380,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return DeMorgan;
{
- Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- // (A|B) & ~(A&B) -> A^B
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
- ((A == C && B == D) || (A == D && B == C)))
- return BinaryOperator::CreateXor(A, B);
-
- // ~(A&B) & (A|B) -> A^B
- if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
- match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
- ((A == C && B == D) || (A == D && B == C)))
- return BinaryOperator::CreateXor(A, B);
-
+ Value *A = nullptr, *B = nullptr, *C = nullptr;
// A&(A^B) => A & ~B
{
Value *tmpOp0 = Op0;
@@ -1405,11 +1407,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
// (A&((~A)|B)) -> A&B
- if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
- match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+ if (match(Op0, m_c_Or(m_Not(m_Specific(Op1)), m_Value(A))))
return BinaryOperator::CreateAnd(A, Op1);
- if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
- match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+ if (match(Op1, m_c_Or(m_Not(m_Specific(Op0)), m_Value(A))))
return BinaryOperator::CreateAnd(A, Op0);
// (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
@@ -1425,13 +1425,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C));
// (A | B) & ((~A) ^ B) -> (A & B)
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
+ // (A | B) & (B ^ (~A)) -> (A & B)
+ // (B | A) & ((~A) ^ B) -> (A & B)
+ // (B | A) & (B ^ (~A)) -> (A & B)
+ if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateAnd(A, B);
// ((~A) ^ B) & (A | B) -> (A & B)
// ((~A) ^ B) & (B | A) -> (A & B)
- if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ // (B ^ (~A)) & (A | B) -> (A & B)
+ // (B ^ (~A)) & (B | A) -> (A & B)
+ if (match(Op0, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateAnd(A, B);
}
@@ -2037,15 +2042,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
- // (A&B)|(A&C) -> A&(B|C) etc
- if (Value *V = SimplifyUsingDistributiveLaws(I))
- return replaceInstUsesWith(I, V);
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
+ // Do this before using distributive laws to catch simple and/or/not patterns.
+ if (Instruction *Xor = foldOrToXor(I, *Builder))
+ return Xor;
+
+ // (A&B)|(A&C) -> A&(B|C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
@@ -2105,19 +2114,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
match(Op0, m_c_And(m_Specific(A), m_Value(B))))
return BinaryOperator::CreateOr(Op1, B);
- // (A & ~B) | (A ^ B) -> (A ^ B)
- // (~B & A) | (A ^ B) -> (A ^ B)
- if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Xor(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
- // Commute the 'or' operands.
- // (A ^ B) | (A & ~B) -> (A ^ B)
- // (A ^ B) | (~B & A) -> (A ^ B)
- if (match(Op1, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op0, m_Xor(m_Specific(A), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
// (A & C)|(B & D)
Value *C = nullptr, *D = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
@@ -2182,23 +2178,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
}
- // ((A&~B)|(~A&B)) -> A^B
- if ((match(C, m_Not(m_Specific(D))) &&
- match(B, m_Not(m_Specific(A)))))
- return BinaryOperator::CreateXor(A, D);
- // ((~B&A)|(~A&B)) -> A^B
- if ((match(A, m_Not(m_Specific(D))) &&
- match(B, m_Not(m_Specific(C)))))
- return BinaryOperator::CreateXor(C, D);
- // ((A&~B)|(B&~A)) -> A^B
- if ((match(C, m_Not(m_Specific(B))) &&
- match(D, m_Not(m_Specific(A)))))
- return BinaryOperator::CreateXor(A, B);
- // ((~B&A)|(B&~A)) -> A^B
- if ((match(A, m_Not(m_Specific(B))) &&
- match(D, m_Not(m_Specific(C)))))
- return BinaryOperator::CreateXor(C, B);
-
// ((A|B)&1)|(B&-2) -> (A&1) | B
if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
match(A, m_Or(m_Specific(B), m_Value(V1)))) {
@@ -2374,6 +2353,58 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return Changed ? &I : nullptr;
}
+/// A ^ B can be specified using other logic ops in a variety of patterns. We
+/// can fold these early and efficiently by morphing an existing instruction.
+static Instruction *foldXorToXor(BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Xor);
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Value *A, *B;
+
+ // There are 4 commuted variants for each of the basic patterns.
+
+ // (A & B) ^ (A | B) -> A ^ B
+ // (A & B) ^ (B | A) -> A ^ B
+ // (A | B) ^ (A & B) -> A ^ B
+ // (A | B) ^ (B & A) -> A ^ B
+ if ((match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B)))) ||
+ (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Specific(B))))) {
+ I.setOperand(0, A);
+ I.setOperand(1, B);
+ return &I;
+ }
+
+ // (A | ~B) ^ (~A | B) -> A ^ B
+ // (~B | A) ^ (~A | B) -> A ^ B
+ // (~A | B) ^ (A | ~B) -> A ^ B
+ // (B | ~A) ^ (A | ~B) -> A ^ B
+ if ((match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B)))))) {
+ I.setOperand(0, A);
+ I.setOperand(1, B);
+ return &I;
+ }
+
+ // (A & ~B) ^ (~A & B) -> A ^ B
+ // (~B & A) ^ (~A & B) -> A ^ B
+ // (~A & B) ^ (A & ~B) -> A ^ B
+ // (B & ~A) ^ (A & ~B) -> A ^ B
+ if ((match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B)))) ||
+ (match(Op0, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))) {
+ I.setOperand(0, A);
+ I.setOperand(1, B);
+ return &I;
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2387,6 +2418,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
+ if (Instruction *NewXor = foldXorToXor(I))
+ return NewXor;
+
// (A&B)^(A&C) -> A&(B^C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
@@ -2399,44 +2433,39 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I))
return replaceInstUsesWith(I, V);
- // Is this a ~ operation?
- if (Value *NotOp = dyn_castNotVal(&I)) {
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
- if (Op0I->getOpcode() == Instruction::And ||
- Op0I->getOpcode() == Instruction::Or) {
- // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
- // ~(~X | Y) === (X & ~Y) - De Morgan's Law
- if (dyn_castNotVal(Op0I->getOperand(1)))
- Op0I->swapOperands();
- if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
- Value *NotY =
- Builder->CreateNot(Op0I->getOperand(1),
- Op0I->getOperand(1)->getName()+".not");
- if (Op0I->getOpcode() == Instruction::And)
- return BinaryOperator::CreateOr(Op0NotVal, NotY);
- return BinaryOperator::CreateAnd(Op0NotVal, NotY);
- }
-
- // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
- // ~(X | Y) === (~X & ~Y) - De Morgan's Law
- if (IsFreeToInvert(Op0I->getOperand(0),
- Op0I->getOperand(0)->hasOneUse()) &&
- IsFreeToInvert(Op0I->getOperand(1),
- Op0I->getOperand(1)->hasOneUse())) {
- Value *NotX =
- Builder->CreateNot(Op0I->getOperand(0), "notlhs");
- Value *NotY =
- Builder->CreateNot(Op0I->getOperand(1), "notrhs");
- if (Op0I->getOpcode() == Instruction::And)
- return BinaryOperator::CreateOr(NotX, NotY);
- return BinaryOperator::CreateAnd(NotX, NotY);
- }
+ // Is this a 'not' (~) fed by a binary operator?
+ BinaryOperator *NotOp;
+ if (match(&I, m_Not(m_BinOp(NotOp)))) {
+ if (NotOp->getOpcode() == Instruction::And ||
+ NotOp->getOpcode() == Instruction::Or) {
+ // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+ // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+ if (dyn_castNotVal(NotOp->getOperand(1)))
+ NotOp->swapOperands();
+ if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) {
+ Value *NotY = Builder->CreateNot(
+ NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not");
+ if (NotOp->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(Op0NotVal, NotY);
+ return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+ }
- } else if (Op0I->getOpcode() == Instruction::AShr) {
- // ~(~X >>s Y) --> (X >>s Y)
- if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
- return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+ // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+ // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+ if (IsFreeToInvert(NotOp->getOperand(0),
+ NotOp->getOperand(0)->hasOneUse()) &&
+ IsFreeToInvert(NotOp->getOperand(1),
+ NotOp->getOperand(1)->hasOneUse())) {
+ Value *NotX = Builder->CreateNot(NotOp->getOperand(0), "notlhs");
+ Value *NotY = Builder->CreateNot(NotOp->getOperand(1), "notrhs");
+ if (NotOp->getOpcode() == Instruction::And)
+ return BinaryOperator::CreateOr(NotX, NotY);
+ return BinaryOperator::CreateAnd(NotX, NotY);
}
+ } else if (NotOp->getOpcode() == Instruction::AShr) {
+ // ~(~X >>s Y) --> (X >>s Y)
+ if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0)))
+ return BinaryOperator::CreateAShr(Op0NotVal, NotOp->getOperand(1));
}
}
@@ -2574,40 +2603,6 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
{
Value *A, *B, *C, *D;
- // (A & B)^(A | B) -> A ^ B
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_Or(m_Value(C), m_Value(D)))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- // (A | B)^(A & B) -> A ^ B
- if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1, m_And(m_Value(C), m_Value(D)))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- // (A | ~B) ^ (~A | B) -> A ^ B
- // (~B | A) ^ (~A | B) -> A ^ B
- if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
- // (~A | B) ^ (A | ~B) -> A ^ B
- if (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
- return BinaryOperator::CreateXor(A, B);
- }
- // (A & ~B) ^ (~A & B) -> A ^ B
- // (~B & A) ^ (~A & B) -> A ^ B
- if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B))))
- return BinaryOperator::CreateXor(A, B);
-
- // (~A & B) ^ (A & ~B) -> A ^ B
- if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
- return BinaryOperator::CreateXor(A, B);
- }
// (A ^ C)^(A | B) -> ((~A) & B) ^ C
if (match(Op0, m_Xor(m_Value(D), m_Value(C))) &&
match(Op1, m_Or(m_Value(A), m_Value(B)))) {
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e7aa1a457371..313ab13b9e2b 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -44,6 +44,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
@@ -1378,14 +1379,13 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
return nullptr;
unsigned BitWidth = IT->getBitWidth();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II);
+ KnownBits Known(BitWidth);
+ IC.computeKnownBits(Op0, Known, 0, &II);
// Create a mask for bits above (ctlz) or below (cttz) the first known one.
bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
- unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros()
- : KnownOne.countLeadingZeros();
+ unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros()
+ : Known.One.countLeadingZeros();
APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
: APInt::getHighBitsSet(BitWidth, NumMaskBits);
@@ -1393,7 +1393,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
// zero, this value is constant.
// FIXME: This should be in InstSimplify because we're replacing an
// instruction with a constant.
- if ((Mask & KnownZero) == Mask) {
+ if (Mask.isSubsetOf(Known.Zero)) {
auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
return IC.replaceInstUsesWith(II, C);
}
@@ -1401,7 +1401,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
// If the input to cttz/ctlz is known to be non-zero,
// then change the 'ZeroIsUndef' parameter to 'true'
// because we know the zero behavior can't affect the result.
- if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
+ if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
if (!match(II.getArgOperand(1), m_One())) {
II.setOperand(1, IC.Builder->getTrue());
return &II;
@@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
- return replaceInstUsesWith(*II,
- ConstantExpr::getSExt(CCmp, II->getType()));
+ if (CCmp->isNullValue()) {
+ return replaceInstUsesWith(
+ *II, ConstantExpr::getSExt(CCmp, II->getType()));
+ }
+
+ // The result of V_ICMP/V_FCMP assembly instructions (which this
+ // intrinsic exposes) is one bit per thread, masked with the EXEC
+ // register (which contains the bitmask of live threads). So a
+ // comparison that always returns true is the same as a read of the
+ // EXEC register.
+ Value *NewF = Intrinsic::getDeclaration(
+ II->getModule(), Intrinsic::read_register, II->getType());
+ Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
+ MDNode *MD = MDNode::get(II->getContext(), MDArgs);
+ Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
+ CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ NewCall->addAttribute(AttributeList::FunctionIndex,
+ Attribute::Convergent);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
}
// Canonicalize constants to RHS.
@@ -3599,9 +3617,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// If there is a dominating assume with the same condition as this one,
// then this one is redundant, and should be removed.
- APInt KnownZero(1, 0), KnownOne(1, 0);
- computeKnownBits(IIOperand, KnownZero, KnownOne, 0, II);
- if (KnownOne.isAllOnesValue())
+ KnownBits Known(1);
+ computeKnownBits(IIOperand, Known, 0, II);
+ if (Known.One.isAllOnesValue())
return eraseInstFromFunction(*II);
// Update the cache of affected values for this assumption (we might be
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 9127ddca5915..312d9baae43a 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,9 +14,10 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -676,11 +677,10 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// This only works for EQ and NE
ICI->isEquality()) {
// If Op1C some other power of two, convert:
- uint32_t BitWidth = Op1C->getType()->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne, 0, &CI);
+ KnownBits Known(Op1C->getType()->getBitWidth());
+ computeKnownBits(ICI->getOperand(0), Known, 0, &CI);
- APInt KnownZeroMask(~KnownZero);
+ APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
if (!DoTransform) return ICI;
@@ -726,13 +726,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
- APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
- APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS, 0, &CI);
- computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS, 0, &CI);
+ KnownBits KnownLHS(BitWidth);
+ KnownBits KnownRHS(BitWidth);
+ computeKnownBits(LHS, KnownLHS, 0, &CI);
+ computeKnownBits(RHS, KnownRHS, 0, &CI);
- if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
- APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
+ APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
if (!DoTransform) return ICI;
@@ -740,7 +740,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
Value *Result = Builder->CreateXor(LHS, RHS);
// Mask off any bits that are set and won't be shifted away.
- if (KnownOneLHS.uge(UnknownBit))
+ if (KnownLHS.One.uge(UnknownBit))
Result = Builder->CreateAnd(Result,
ConstantInt::get(ITy, UnknownBit));
@@ -1049,10 +1049,10 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
if (ICI->hasOneUse() &&
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Op0, KnownZero, KnownOne, 0, &CI);
+ KnownBits Known(BitWidth);
+ computeKnownBits(Op0, Known, 0, &CI);
- APInt KnownZeroMask(~KnownZero);
+ APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) {
Value *In = ICI->getOperand(0);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 003029ae39d5..d846a631b96f 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -175,19 +176,18 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
/// Given a signed integer type and a set of known zero and one bits, compute
/// the maximum and minimum values that could have the specified known zero and
/// known one bits, returning them in Min/Max.
-static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
- const APInt &KnownOne,
+/// TODO: Move to method on KnownBits struct?
+static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,
APInt &Min, APInt &Max) {
- assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
- KnownZero.getBitWidth() == Min.getBitWidth() &&
- KnownZero.getBitWidth() == Max.getBitWidth() &&
+ assert(Known.getBitWidth() == Min.getBitWidth() &&
+ Known.getBitWidth() == Max.getBitWidth() &&
"KnownZero, KnownOne and Min, Max must have equal bitwidth.");
- APInt UnknownBits = ~(KnownZero|KnownOne);
+ APInt UnknownBits = ~(Known.Zero|Known.One);
// The minimum value is when all unknown bits are zeros, EXCEPT for the sign
// bit if it is unknown.
- Min = KnownOne;
- Max = KnownOne|UnknownBits;
+ Min = Known.One;
+ Max = Known.One|UnknownBits;
if (UnknownBits.isNegative()) { // Sign bit is unknown
Min.setBit(Min.getBitWidth()-1);
@@ -198,19 +198,18 @@ static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
/// Given an unsigned integer type and a set of known zero and one bits, compute
/// the maximum and minimum values that could have the specified known zero and
/// known one bits, returning them in Min/Max.
-static void computeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
- const APInt &KnownOne,
+/// TODO: Move to method on KnownBits struct?
+static void computeUnsignedMinMaxValuesFromKnownBits(const KnownBits &Known,
APInt &Min, APInt &Max) {
- assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
- KnownZero.getBitWidth() == Min.getBitWidth() &&
- KnownZero.getBitWidth() == Max.getBitWidth() &&
+ assert(Known.getBitWidth() == Min.getBitWidth() &&
+ Known.getBitWidth() == Max.getBitWidth() &&
"Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
- APInt UnknownBits = ~(KnownZero|KnownOne);
+ APInt UnknownBits = ~(Known.Zero|Known.One);
// The minimum value is when the unknown bits are all zeros.
- Min = KnownOne;
+ Min = Known.One;
// The maximum value is when the unknown bits are all ones.
- Max = KnownOne|UnknownBits;
+ Max = Known.One|UnknownBits;
}
/// This is called when we see this pattern:
@@ -1479,14 +1478,14 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
// of the high bits truncated out of x are known.
unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
SrcBits = X->getType()->getScalarSizeInBits();
- APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
- computeKnownBits(X, KnownZero, KnownOne, 0, &Cmp);
+ KnownBits Known(SrcBits);
+ computeKnownBits(X, Known, 0, &Cmp);
// If all the high bits are known, we can do this xform.
- if ((KnownZero | KnownOne).countLeadingOnes() >= SrcBits - DstBits) {
+ if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
// Pull in the high bits from known-ones set.
APInt NewRHS = C->zext(SrcBits);
- NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
+ NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
}
}
@@ -4001,16 +4000,16 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
IsSignBit = isSignBitCheck(Pred, *CmpC, UnusedBit);
}
- APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
- APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+ KnownBits Op0Known(BitWidth);
+ KnownBits Op1Known(BitWidth);
if (SimplifyDemandedBits(&I, 0,
getDemandedBitsLHSMask(I, BitWidth, IsSignBit),
- Op0KnownZero, Op0KnownOne, 0))
+ Op0Known, 0))
return &I;
if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
- Op1KnownZero, Op1KnownOne, 0))
+ Op1Known, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -4019,15 +4018,11 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
if (I.isSigned()) {
- computeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min,
- Op0Max);
- computeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min,
- Op1Max);
+ computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
+ computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
} else {
- computeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min,
- Op0Max);
- computeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min,
- Op1Max);
+ computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
+ computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
}
// If Min and Max are known to be the same, then SimplifyDemandedBits
@@ -4054,8 +4049,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
// If all bits are known zero except for one, then we know at most one bit
// is set. If the comparison is against zero, then this is a check to see if
// *that* bit is set.
- APInt Op0KnownZeroInverted = ~Op0KnownZero;
- if (~Op1KnownZero == 0) {
+ APInt Op0KnownZeroInverted = ~Op0Known.Zero;
+ if (~Op1Known.Zero == 0) {
// If the LHS is an AND with the same constant, look through it.
Value *LHS = nullptr;
const APInt *LHSC;
@@ -4193,8 +4188,8 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
// Turn a signed comparison into an unsigned one if both operands are known to
// have the same sign.
if (I.isSigned() &&
- ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
- (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+ ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
+ (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 71000063ab3c..776686d3d117 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -489,10 +489,9 @@ public:
return nullptr; // Don't do anything with FI
}
- void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ void computeKnownBits(Value *V, KnownBits &Known,
unsigned Depth, Instruction *CxtI) const {
- return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI,
- &DT);
+ return llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
@@ -536,24 +535,23 @@ private:
/// \brief Attempts to replace V with a simpler value based on the demanded
/// bits.
- Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth,
- Instruction *CxtI);
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known,
+ unsigned Depth, Instruction *CxtI);
bool SimplifyDemandedBits(Instruction *I, unsigned Op,
- const APInt &DemandedMask, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth = 0);
+ const APInt &DemandedMask, KnownBits &Known,
+ unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
/// bits. It also tries to handle simplifications that can be done based on
/// DemandedMask, but without modifying the Instruction.
Value *SimplifyMultipleUseDemandedBits(Instruction *I,
const APInt &DemandedMask,
- APInt &KnownZero, APInt &KnownOne,
+ KnownBits &Known,
unsigned Depth, Instruction *CxtI);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
/// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
- Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
- const APInt &DemandedMask, APInt &KnownZero,
- APInt &KnownOne);
+ Value *simplifyShrShlDemandedBits(
+ Instruction *Shr, const APInt &ShrOp1, Instruction *Shl,
+ const APInt &ShlOp1, const APInt &DemandedMask, KnownBits &Known);
/// \brief Tries to simplify operands to an integer instruction based on its
/// demanded bits.
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 5d6d899da4b5..76829c5e457b 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace PatternMatch;
@@ -1476,11 +1477,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// The motivation for this call into value tracking is to take advantage of
// the assumption cache, so make sure that is populated.
if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
- APInt KnownOne(1, 0), KnownZero(1, 0);
- computeKnownBits(CondVal, KnownZero, KnownOne, 0, &SI);
- if (KnownOne == 1)
+ KnownBits Known(1);
+ computeKnownBits(CondVal, Known, 0, &SI);
+ if (Known.One == 1)
return replaceInstUsesWith(SI, TrueVal);
- if (KnownZero == 1)
+ if (Known.Zero == 1)
return replaceInstUsesWith(SI, FalseVal);
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 2ba052b7e02d..8d0ed8532779 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -26,7 +27,7 @@ using namespace llvm::PatternMatch;
/// constant integer. If so, check to see if there are any bits set in the
/// constant that are not demanded. If so, shrink the constant and return true.
static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
- APInt Demanded) {
+ const APInt &Demanded) {
assert(I && "No instruction?");
assert(OpNo < I->getNumOperands() && "Operand index too large");
@@ -37,13 +38,11 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
return false;
// If there are no bits set that aren't demanded, nothing to do.
- Demanded = Demanded.zextOrTrunc(C->getBitWidth());
if (C->isSubsetOf(Demanded))
return false;
// This instruction is producing bits that are not demanded. Shrink the RHS.
- Demanded &= *C;
- I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded));
+ I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded));
return true;
}
@@ -54,10 +53,10 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
/// the instruction has any properties that allow us to simplify its operands.
bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ KnownBits Known(BitWidth);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
- Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne,
+ Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
if (!V) return false;
if (V == &Inst) return true;
@@ -70,11 +69,11 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
/// change and false otherwise.
bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
const APInt &DemandedMask,
- APInt &KnownZero, APInt &KnownOne,
+ KnownBits &Known,
unsigned Depth) {
Use &U = I->getOperandUse(OpNo);
- Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
- KnownOne, Depth, I);
+ Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, Known,
+ Depth, I);
if (!NewVal) return false;
U = NewVal;
return true;
@@ -88,15 +87,16 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
/// with a constant or one of its operands. In such cases, this function does
/// the replacement and returns true. In all other cases, it returns false after
/// analyzing the expression and setting KnownOne and known to be one in the
-/// expression. KnownZero contains all the bits that are known to be zero in the
-/// expression. These are provided to potentially allow the caller (which might
-/// recursively be SimplifyDemandedBits itself) to simplify the expression.
-/// KnownOne and KnownZero always follow the invariant that:
-/// KnownOne & KnownZero == 0.
-/// That is, a bit can't be both 1 and 0. Note that the bits in KnownOne and
-/// KnownZero may only be accurate for those bits set in DemandedMask. Note also
-/// that the bitwidth of V, DemandedMask, KnownZero and KnownOne must all be the
-/// same.
+/// expression. Known.Zero contains all the bits that are known to be zero in
+/// the expression. These are provided to potentially allow the caller (which
+/// might recursively be SimplifyDemandedBits itself) to simplify the
+/// expression.
+/// Known.One and Known.Zero always follow the invariant that:
+/// Known.One & Known.Zero == 0.
+/// That is, a bit can't be both 1 and 0. Note that the bits in Known.One and
+/// Known.Zero may only be accurate for those bits set in DemandedMask. Note
+/// also that the bitwidth of V, DemandedMask, Known.Zero and Known.One must all
+/// be the same.
///
/// This returns null if it did not change anything and it permits no
/// simplification. This returns V itself if it did some simplification of V's
@@ -104,8 +104,7 @@ bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
/// some other non-null value if it found out that V is equal to another value
/// in the context where the specified bits are demanded, but not for all users.
Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
- APInt &KnownZero, APInt &KnownOne,
- unsigned Depth,
+ KnownBits &Known, unsigned Depth,
Instruction *CxtI) {
assert(V != nullptr && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
@@ -113,18 +112,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Type *VTy = V->getType();
assert(
(!VTy->isIntOrIntVectorTy() || VTy->getScalarSizeInBits() == BitWidth) &&
- KnownZero.getBitWidth() == BitWidth &&
- KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne "
- "must have same BitWidth");
+ Known.getBitWidth() == BitWidth &&
+ "Value *V, DemandedMask and Known must have same BitWidth");
if (isa<Constant>(V)) {
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
return nullptr;
}
- KnownZero.clearAllBits();
- KnownOne.clearAllBits();
+ Known.Zero.clearAllBits();
+ Known.One.clearAllBits();
if (DemandedMask == 0) // Not demanding any bits from V.
return UndefValue::get(VTy);
@@ -133,20 +130,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
return nullptr; // Only analyze instructions.
}
// If there are multiple uses of this value and we aren't at the root, then
// we can't do any simplifications of the operands, because DemandedMask
// only reflects the bits demanded by *one* of the users.
- if (Depth != 0 && !I->hasOneUse()) {
- return SimplifyMultipleUseDemandedBits(I, DemandedMask, KnownZero, KnownOne,
- Depth, CxtI);
- }
+ if (Depth != 0 && !I->hasOneUse())
+ return SimplifyMultipleUseDemandedBits(I, DemandedMask, Known, Depth, CxtI);
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ KnownBits LHSKnown(BitWidth), RHSKnown(BitWidth);
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedMask to all bits so that we can try to simplify the
@@ -157,22 +151,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
switch (I->getOpcode()) {
default:
- computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(I, Known, Depth, CxtI);
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown,
+ Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- APInt IKnownZero = RHSKnownZero | LHSKnownZero;
+ APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
// Output known-1 bits are only known if set in both the LHS & RHS.
- APInt IKnownOne = RHSKnownOne & LHSKnownOne;
+ APInt IKnownOne = RHSKnown.One & LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -181,33 +174,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
- if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
- if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+ if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnown.Zero))
return I;
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Or: {
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown,
+ Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// Output known-0 bits are only known if clear in both the LHS & RHS.
- APInt IKnownZero = RHSKnownZero & LHSKnownZero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- APInt IKnownOne = RHSKnownOne | LHSKnownOne;
+ APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
+ // Output known-1 are known. to be set if s.et in either the LHS | RHS.
+ APInt IKnownOne = RHSKnown.One | LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -216,34 +208,32 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
- if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return I->getOperand(1);
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(I, 1, DemandedMask))
return I;
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Xor: {
- if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
- (RHSKnownOne & LHSKnownOne);
+ APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
+ (RHSKnown.One & LHSKnown.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
- (RHSKnownOne & LHSKnownZero);
+ APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) |
+ (RHSKnown.One & LHSKnown.Zero);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -252,15 +242,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
- if (DemandedMask.isSubsetOf(RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
// If all of the demanded bits are known to be zero on one side or the
// other, turn this into an *inclusive* or.
// e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
- if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) {
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.Zero)) {
Instruction *Or =
BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
I->getName());
@@ -271,10 +261,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
// e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
- if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) &&
- RHSKnownOne.isSubsetOf(LHSKnownOne)) {
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero|RHSKnown.One) &&
+ RHSKnown.One.isSubsetOf(LHSKnown.One)) {
Constant *AndC = Constant::getIntegerValue(VTy,
- ~RHSKnownOne & DemandedMask);
+ ~RHSKnown.One & DemandedMask);
Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC);
return InsertNewInstWith(And, *I);
}
@@ -292,10 +282,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
isa<ConstantInt>(I->getOperand(1)) &&
isa<ConstantInt>(LHSInst->getOperand(1)) &&
- (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+ (LHSKnown.One & RHSKnown.One & DemandedMask) != 0) {
ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
- APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+ APInt NewMask = ~(LHSKnown.One & RHSKnown.One & DemandedMask);
Constant *AndC =
ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
@@ -309,9 +299,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownZero = std::move(IKnownZero);
+ Known.Zero = std::move(IKnownZero);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = std::move(IKnownOne);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Select:
@@ -321,13 +311,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
- if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne,
- Depth + 1) ||
- SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1))
return I;
- assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
- assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?");
+ assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?");
// If the operands are constants, see if we can simplify them.
if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
@@ -335,21 +323,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
// Only known if known in both the LHS and RHS.
- KnownOne = RHSKnownOne & LHSKnownOne;
- KnownZero = RHSKnownZero & LHSKnownZero;
+ Known.One = RHSKnown.One & LHSKnown.One;
+ Known.Zero = RHSKnown.Zero & LHSKnown.Zero;
break;
case Instruction::Trunc: {
unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask = DemandedMask.zext(truncBf);
- KnownZero = KnownZero.zext(truncBf);
- KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
- Depth + 1))
+ Known.Zero = Known.Zero.zext(truncBf);
+ Known.One = Known.One.zext(truncBf);
+ if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
- KnownZero = KnownZero.trunc(BitWidth);
- KnownOne = KnownOne.trunc(BitWidth);
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ Known.Zero = Known.Zero.trunc(BitWidth);
+ Known.One = Known.One.trunc(BitWidth);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
break;
}
case Instruction::BitCast:
@@ -369,27 +356,25 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
break;
case Instruction::ZExt: {
// Compute the bits in the result that are not present in the input.
unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask = DemandedMask.trunc(SrcBitWidth);
- KnownZero = KnownZero.trunc(SrcBitWidth);
- KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
- Depth + 1))
+ Known.Zero = Known.Zero.trunc(SrcBitWidth);
+ Known.One = Known.One.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ Known.Zero = Known.Zero.zext(BitWidth);
+ Known.One = Known.One.zext(BitWidth);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
// The top bits are known to be zero.
- KnownZero.setBitsFrom(SrcBitWidth);
+ Known.Zero.setBitsFrom(SrcBitWidth);
break;
}
case Instruction::SExt: {
@@ -406,27 +391,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits.setBit(SrcBitWidth-1);
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
- KnownZero = KnownZero.trunc(SrcBitWidth);
- KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne,
- Depth + 1))
+ Known.Zero = Known.Zero.trunc(SrcBitWidth);
+ Known.One = Known.One.trunc(SrcBitWidth);
+ if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ Known.Zero = Known.Zero.zext(BitWidth);
+ Known.One = Known.One.zext(BitWidth);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
// If the input sign bit is known zero, or if the NewBits are not demanded
// convert this into a zero extension.
- if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+ if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
// Convert to ZExt cast
CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
return InsertNewInstWith(NewCast, *I);
- } else if (KnownOne[SrcBitWidth-1]) { // Input sign bit known set
- KnownOne |= NewBits;
+ } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set
+ Known.One |= NewBits;
}
break;
}
@@ -440,11 +424,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// significant bit and all those below it.
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
- SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne,
- Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnown, Depth + 1) ||
ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
- SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne,
- Depth + 1)) {
+ SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1)) {
// Disable the nsw and nuw flags here: We can no longer guarantee that
// we won't wrap after simplification. Removing the nsw/nuw flags is
// legal here because the top bit is not demanded.
@@ -456,30 +438,28 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If we are known to be adding/subtracting zeros to every bit below
// the highest demanded bit, we just return the other side.
- if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps)
+ if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
// We can't do this with the LHS for subtraction.
if (I->getOpcode() == Instruction::Add &&
- (DemandedFromOps & LHSKnownZero) == DemandedFromOps)
+ DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
}
// Otherwise just hand the add/sub off to computeKnownBits to fill in
// the known zeros and ones.
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
break;
}
- case Instruction::Shl:
- if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
- {
- Value *VarX; ConstantInt *C1;
- if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
- Instruction *Shr = cast<Instruction>(I->getOperand(0));
- Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
- KnownZero, KnownOne);
- if (R)
- return R;
- }
+ case Instruction::Shl: {
+ const APInt *SA;
+ if (match(I->getOperand(1), m_APInt(SA))) {
+ const APInt *ShrAmt;
+ if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) {
+ Instruction *Shr = cast<Instruction>(I->getOperand(0));
+ if (Value *R = simplifyShrShlDemandedBits(
+ Shr, *ShrAmt, I, *SA, DemandedMask, Known))
+ return R;
}
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
@@ -492,17 +472,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
else if (IOp->hasNoUnsignedWrap())
DemandedMaskIn.setHighBits(ShiftAmt);
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero <<= ShiftAmt;
- KnownOne <<= ShiftAmt;
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ Known.Zero <<= ShiftAmt;
+ Known.One <<= ShiftAmt;
// low bits known zero.
if (ShiftAmt)
- KnownZero.setLowBits(ShiftAmt);
+ Known.Zero.setLowBits(ShiftAmt);
}
break;
+ }
case Instruction::LShr: {
const APInt *SA;
if (match(I->getOperand(1), m_APInt(SA))) {
@@ -516,14 +496,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<LShrOperator>(I)->isExact())
DemandedMaskIn.setLowBits(ShiftAmt);
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero.lshrInPlace(ShiftAmt);
- KnownOne.lshrInPlace(ShiftAmt);
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
+ Known.Zero.lshrInPlace(ShiftAmt);
+ Known.One.lshrInPlace(ShiftAmt);
if (ShiftAmt)
- KnownZero.setHighBits(ShiftAmt); // high bits known zero.
+ Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
}
break;
}
@@ -560,15 +539,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (cast<AShrOperator>(I)->isExact())
DemandedMaskIn.setLowBits(ShiftAmt);
- if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
return I;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- KnownZero.lshrInPlace(ShiftAmt);
- KnownOne.lshrInPlace(ShiftAmt);
+ Known.Zero.lshrInPlace(ShiftAmt);
+ Known.One.lshrInPlace(ShiftAmt);
// Handle the sign bits.
APInt SignMask(APInt::getSignMask(BitWidth));
@@ -577,14 +555,14 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
- if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] ||
- (HighBits & ~DemandedMask) == HighBits) {
+ if (BitWidth <= ShiftAmt || Known.Zero[BitWidth-ShiftAmt-1] ||
+ !DemandedMask.intersects(HighBits)) {
BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0),
I->getOperand(1));
LShr->setIsExact(cast<BinaryOperator>(I)->isExact());
return InsertNewInstWith(LShr, *I);
- } else if ((KnownOne & SignMask) != 0) { // New bits are known one.
- KnownOne |= HighBits;
+ } else if (Known.One.intersects(SignMask)) { // New bits are known one.
+ Known.One |= HighBits;
}
}
break;
@@ -602,25 +580,24 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignMask(BitWidth);
- if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne,
- Depth + 1))
+ if (SimplifyDemandedBits(I, 0, Mask2, LHSKnown, Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
- KnownZero = LHSKnownZero & LowBits;
- KnownOne = LHSKnownOne & LowBits;
+ Known.Zero = LHSKnown.Zero & LowBits;
+ Known.One = LHSKnown.One & LowBits;
// If LHS is non-negative or has all low bits zero, then the upper bits
// are all zero.
- if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits))
- KnownZero |= ~LowBits;
+ if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero))
+ Known.Zero |= ~LowBits;
// If LHS is negative and not all low bits are zero, then the upper bits
// are all one.
- if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0))
- KnownOne |= ~LowBits;
+ if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One))
+ Known.One |= ~LowBits;
- assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+ assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?");
break;
}
}
@@ -628,22 +605,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero.
if (DemandedMask.isSignBitSet()) {
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
- CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
// If it's known zero, our sign bit is also zero.
- if (LHSKnownZero.isSignBitSet())
- KnownZero.setSignBit();
+ if (LHSKnown.Zero.isSignBitSet())
+ Known.Zero.setSignBit();
}
break;
case Instruction::URem: {
- APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ KnownBits Known2(BitWidth);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) ||
- SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
return I;
- unsigned Leaders = KnownZero2.countLeadingOnes();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ unsigned Leaders = Known2.Zero.countLeadingOnes();
+ Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
break;
}
case Instruction::Call:
@@ -707,56 +683,54 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return ConstantInt::getNullValue(VTy);
// We know that the upper bits are set to zero.
- KnownZero.setBitsFrom(ArgWidth);
+ Known.Zero.setBitsFrom(ArgWidth);
return nullptr;
}
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero.setBitsFrom(32);
+ Known.Zero.setBitsFrom(32);
return nullptr;
}
}
- computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
+ computeKnownBits(V, Known, Depth, CxtI);
break;
}
// If the client is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
- return Constant::getIntegerValue(VTy, KnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
+ return Constant::getIntegerValue(VTy, Known.One);
return nullptr;
}
-/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
+/// Helper routine of SimplifyDemandedUseBits. It computes Known
/// bits. It also tries to handle simplifications that can be done based on
/// DemandedMask, but without modifying the Instruction.
Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
const APInt &DemandedMask,
- APInt &KnownZero,
- APInt &KnownOne,
+ KnownBits &Known,
unsigned Depth,
Instruction *CxtI) {
unsigned BitWidth = DemandedMask.getBitWidth();
Type *ITy = I->getType();
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+ KnownBits LHSKnown(BitWidth);
+ KnownBits RHSKnown(BitWidth);
// Despite the fact that we can't simplify this instruction in all User's
- // context, we can at least compute the knownzero/knownone bits, and we can
+ // context, we can at least compute the known bits, and we can
// do simplifications that apply to *just* the one user if we know that
// this instruction has a simpler value in that context.
switch (I->getOpcode()) {
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
// Output known-0 are known to be clear if zero in either the LHS | RHS.
- APInt IKnownZero = RHSKnownZero | LHSKnownZero;
+ APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero;
// Output known-1 bits are only known if set in both the LHS & RHS.
- APInt IKnownOne = RHSKnownOne & LHSKnownOne;
+ APInt IKnownOne = RHSKnown.One & LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -766,13 +740,13 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
// context.
- if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
return I->getOperand(1);
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Or: {
@@ -780,15 +754,14 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
// Output known-0 bits are only known if clear in both the LHS & RHS.
- APInt IKnownZero = RHSKnownZero & LHSKnownZero;
+ APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero;
// Output known-1 are known to be set if set in either the LHS | RHS.
- APInt IKnownOne = RHSKnownOne | LHSKnownOne;
+ APInt IKnownOne = RHSKnown.One | LHSKnown.One;
// If the client is only demanding bits that we know, return the known
// constant.
@@ -798,30 +771,29 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
// context.
- if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
return I->getOperand(1);
- KnownZero = std::move(IKnownZero);
- KnownOne = std::move(IKnownOne);
+ Known.Zero = std::move(IKnownZero);
+ Known.One = std::move(IKnownOne);
break;
}
case Instruction::Xor: {
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1,
CxtI);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
- (RHSKnownOne & LHSKnownOne);
+ APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) |
+ (RHSKnown.One & LHSKnown.One);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
- (RHSKnownOne & LHSKnownZero);
+ APInt IKnownOne = (RHSKnown.Zero & LHSKnown.One) |
+ (RHSKnown.One & LHSKnown.Zero);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -830,25 +802,25 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
// If all of the demanded bits are known zero on one side, return the
// other.
- if (DemandedMask.isSubsetOf(RHSKnownZero))
+ if (DemandedMask.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
- if (DemandedMask.isSubsetOf(LHSKnownZero))
+ if (DemandedMask.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownZero = std::move(IKnownZero);
+ Known.Zero = std::move(IKnownZero);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = std::move(IKnownOne);
+ Known.One = std::move(IKnownOne);
break;
}
default:
- // Compute the KnownZero/KnownOne bits to simplify things downstream.
- computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+ // Compute the Known bits to simplify things downstream.
+ computeKnownBits(I, Known, Depth, CxtI);
// If this user is only demanding bits that we know, return the known
// constant.
- if (DemandedMask.isSubsetOf(KnownZero|KnownOne))
- return Constant::getIntegerValue(ITy, KnownOne);
+ if (DemandedMask.isSubsetOf(Known.Zero|Known.One))
+ return Constant::getIntegerValue(ITy, Known.One);
break;
}
@@ -874,29 +846,26 @@ Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
///
/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
/// not successful.
-Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
- Instruction *Shl,
- const APInt &DemandedMask,
- APInt &KnownZero,
- APInt &KnownOne) {
-
- const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
- const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
+Value *
+InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1,
+ Instruction *Shl, const APInt &ShlOp1,
+ const APInt &DemandedMask,
+ KnownBits &Known) {
if (!ShlOp1 || !ShrOp1)
- return nullptr; // Noop.
+ return nullptr; // No-op.
Value *VarX = Shr->getOperand(0);
Type *Ty = VarX->getType();
- unsigned BitWidth = Ty->getIntegerBitWidth();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
return nullptr; // Undef.
unsigned ShlAmt = ShlOp1.getZExtValue();
unsigned ShrAmt = ShrOp1.getZExtValue();
- KnownOne.clearAllBits();
- KnownZero.setLowBits(ShlAmt - 1);
- KnownZero &= DemandedMask;
+ Known.One.clearAllBits();
+ Known.Zero.setLowBits(ShlAmt - 1);
+ Known.Zero &= DemandedMask;
APInt BitMask1(APInt::getAllOnesValue(BitWidth));
APInt BitMask2(APInt::getAllOnesValue(BitWidth));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 81f2d9fa179f..4729c79ca4c3 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -60,6 +60,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -641,14 +642,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) {
// They do! Return "L op' R".
++NumExpand;
- // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
- if ((L == A && R == B) ||
- (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
- return Op0;
- // Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
- return V;
- // Otherwise, create a new instruction.
C = Builder->CreateBinOp(InnerOpcode, L, R);
C->takeName(&I);
return C;
@@ -666,14 +659,6 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) {
// They do! Return "L op' R".
++NumExpand;
- // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
- if ((L == B && R == C) ||
- (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
- return Op1;
- // Otherwise return "L op' R" if it simplifies.
- if (Value *V = SimplifyBinOp(InnerOpcode, L, R, DL))
- return V;
- // Otherwise, create a new instruction.
A = Builder->CreateBinOp(InnerOpcode, L, R);
A->takeName(&I);
return A;
@@ -2196,11 +2181,10 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
// There might be assume intrinsics dominating this return that completely
// determine the value. If so, constant fold it.
- unsigned BitWidth = VTy->getPrimitiveSizeInBits();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(ResultOp, KnownZero, KnownOne, 0, &RI);
- if ((KnownZero|KnownOne).isAllOnesValue())
- RI.setOperand(0, Constant::getIntegerValue(VTy, KnownOne));
+ KnownBits Known(VTy->getPrimitiveSizeInBits());
+ computeKnownBits(ResultOp, Known, 0, &RI);
+ if ((Known.Zero|Known.One).isAllOnesValue())
+ RI.setOperand(0, Constant::getIntegerValue(VTy, Known.One));
return nullptr;
}
@@ -2279,10 +2263,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
}
unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
- unsigned LeadingKnownZeros = KnownZero.countLeadingOnes();
- unsigned LeadingKnownOnes = KnownOne.countLeadingOnes();
+ KnownBits Known(BitWidth);
+ computeKnownBits(Cond, Known, 0, &SI);
+ unsigned LeadingKnownZeros = Known.Zero.countLeadingOnes();
+ unsigned LeadingKnownOnes = Known.One.countLeadingOnes();
// Compute the number of leading bits we can ignore.
// TODO: A better way to determine this would use ComputeNumSignBits().
@@ -2879,11 +2863,10 @@ bool InstCombiner::run() {
Type *Ty = I->getType();
if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
unsigned BitWidth = Ty->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
- if ((KnownZero | KnownOne).isAllOnesValue()) {
- Constant *C = ConstantInt::get(Ty, KnownOne);
+ KnownBits Known(BitWidth);
+ computeKnownBits(I, Known, /*Depth*/0, I);
+ if ((Known.Zero | Known.One).isAllOnesValue()) {
+ Constant *C = ConstantInt::get(Ty, Known.One);
DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
" from: " << *I << '\n');
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 036dd8d39a08..b866958e3c4b 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -265,11 +265,10 @@ static cl::opt<bool>
cl::Hidden, cl::init(false));
static cl::opt<bool>
- ClUseMachOGlobalsSection("asan-globals-live-support",
- cl::desc("Use linker features to support dead "
- "code stripping of globals "
- "(Mach-O only)"),
- cl::Hidden, cl::init(true));
+ ClUseGlobalsGC("asan-globals-live-support",
+ cl::desc("Use linker features to support dead "
+ "code stripping of globals"),
+ cl::Hidden, cl::init(true));
// Debug flags.
static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
@@ -594,13 +593,15 @@ struct AddressSanitizer : public FunctionPass {
};
class AddressSanitizerModule : public ModulePass {
- public:
+public:
explicit AddressSanitizerModule(bool CompileKernel = false,
- bool Recover = false)
+ bool Recover = false,
+ bool UseGlobalsGC = true)
: ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan),
- Recover(Recover || ClRecover) {}
+ Recover(Recover || ClRecover),
+ UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC) {}
bool runOnModule(Module &M) override;
- static char ID; // Pass identification, replacement for typeid
+ static char ID; // Pass identification, replacement for typeid
StringRef getPassName() const override { return "AddressSanitizerModule"; }
private:
@@ -635,6 +636,7 @@ private:
GlobalsMetadata GlobalsMD;
bool CompileKernel;
bool Recover;
+ bool UseGlobalsGC;
Type *IntptrTy;
LLVMContext *C;
Triple TargetTriple;
@@ -913,9 +915,10 @@ INITIALIZE_PASS(
"ModulePass",
false, false)
ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
- bool Recover) {
+ bool Recover,
+ bool UseGlobalsGC) {
assert(!CompileKernel || Recover);
- return new AddressSanitizerModule(CompileKernel, Recover);
+ return new AddressSanitizerModule(CompileKernel, Recover, UseGlobalsGC);
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -1537,9 +1540,6 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// binary in order to allow the linker to properly dead strip. This is only
// supported on recent versions of ld64.
bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
- if (!ClUseMachOGlobalsSection)
- return false;
-
if (!TargetTriple.isOSBinFormatMachO())
return false;
@@ -1911,9 +1911,9 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
Initializers[i] = Initializer;
}
- if (TargetTriple.isOSBinFormatCOFF()) {
+ if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {
InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);
- } else if (ShouldUseMachOGlobalsSection()) {
+ } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {
InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers);
} else {
InstrumentGlobalsWithMetadataArray(IRB, M, NewGlobals, Initializers);
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 61d627673c90..d7eb857cff7e 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -943,14 +943,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
BasicBlock *BB = MI->getParent();
DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
DEBUG(dbgs() << *BB << "\n");
+ auto OrigBBFreq = BFI.getBlockFreq(BB);
BasicBlock *DefaultBB = SplitBlock(BB, MI);
BasicBlock::iterator It(*MI);
++It;
assert(It != DefaultBB->end());
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
- DefaultBB->setName("MemOP.Default");
MergeBB->setName("MemOP.Merge");
+ BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
+ DefaultBB->setName("MemOP.Default");
auto &Ctx = Func.getContext();
IRBuilder<> IRB(BB);
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index a5afc8ad977c..c1bbc4e96b16 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -351,8 +351,10 @@ bool TopDownPtrState::HandlePotentialAlterRefCount(Instruction *Inst,
const Value *Ptr,
ProvenanceAnalysis &PA,
ARCInstKind Class) {
- // Check for possible releases.
- if (!CanAlterRefCount(Inst, Ptr, PA, Class))
+ // Check for possible releases. Treat clang.arc.use as a releasing instruction
+ // to prevent sinking a retain past it.
+ if (!CanAlterRefCount(Inst, Ptr, PA, Class) &&
+ Class != ARCInstKind::IntrinsicUser)
return false;
DEBUG(dbgs() << " CanAlterRefCount: Seq: " << GetSeq() << "; " << *Ptr
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index ee6333e88716..f62e111460ca 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -53,6 +53,12 @@ using namespace consthoist;
STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
+static cl::opt<bool> ConstHoistWithBlockFrequency(
+ "consthoist-with-block-frequency", cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to reduce the "
+ "chance to execute const materialization more frequently than "
+ "without hoisting."));
+
namespace {
/// \brief The constant hoisting pass.
class ConstantHoistingLegacyPass : public FunctionPass {
@@ -68,6 +74,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ if (ConstHoistWithBlockFrequency)
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -82,6 +90,7 @@ private:
char ConstantHoistingLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist",
"Constant Hoisting", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist",
@@ -99,9 +108,13 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n");
DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n');
- bool MadeChange = Impl.runImpl(
- Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
- getAnalysis<DominatorTreeWrapperPass>().getDomTree(), Fn.getEntryBlock());
+ bool MadeChange =
+ Impl.runImpl(Fn, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn),
+ getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ ConstHoistWithBlockFrequency
+ ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI()
+ : nullptr,
+ Fn.getEntryBlock());
if (MadeChange) {
DEBUG(dbgs() << "********** Function after Constant Hoisting: "
@@ -148,33 +161,142 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
return IDom->getBlock()->getTerminator();
}
+/// \brief Given \p BBs as input, find another set of BBs which collectively
+/// dominates \p BBs and have the minimal sum of frequencies. Return the BB
+/// set found in \p BBs.
+void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
+ BasicBlock *Entry,
+ SmallPtrSet<BasicBlock *, 8> &BBs) {
+ assert(!BBs.count(Entry) && "Assume Entry is not in BBs");
+ // Nodes on the current path to the root.
+ SmallPtrSet<BasicBlock *, 8> Path;
+ // Candidates includes any block 'BB' in set 'BBs' that is not strictly
+ // dominated by any other blocks in set 'BBs', and all nodes in the path
+ // in the dominator tree from Entry to 'BB'.
+ SmallPtrSet<BasicBlock *, 16> Candidates;
+ for (auto BB : BBs) {
+ Path.clear();
+ // Walk up the dominator tree until Entry or another BB in BBs
+ // is reached. Insert the nodes on the way to the Path.
+ BasicBlock *Node = BB;
+ // The "Path" is a candidate path to be added into Candidates set.
+ bool isCandidate = false;
+ do {
+ Path.insert(Node);
+ if (Node == Entry || Candidates.count(Node)) {
+ isCandidate = true;
+ break;
+ }
+ assert(DT.getNode(Node)->getIDom() &&
+ "Entry doens't dominate current Node");
+ Node = DT.getNode(Node)->getIDom()->getBlock();
+ } while (!BBs.count(Node));
+
+ // If isCandidate is false, Node is another Block in BBs dominating
+ // current 'BB'. Drop the nodes on the Path.
+ if (!isCandidate)
+ continue;
+
+ // Add nodes on the Path into Candidates.
+ Candidates.insert(Path.begin(), Path.end());
+ }
+
+ // Sort the nodes in Candidates in top-down order and save the nodes
+ // in Orders.
+ unsigned Idx = 0;
+ SmallVector<BasicBlock *, 16> Orders;
+ Orders.push_back(Entry);
+ while (Idx != Orders.size()) {
+ BasicBlock *Node = Orders[Idx++];
+ for (auto ChildDomNode : DT.getNode(Node)->getChildren()) {
+ if (Candidates.count(ChildDomNode->getBlock()))
+ Orders.push_back(ChildDomNode->getBlock());
+ }
+ }
+
+ // Visit Orders in bottom-up order.
+ typedef std::pair<SmallPtrSet<BasicBlock *, 16>, BlockFrequency>
+ InsertPtsCostPair;
+ // InsertPtsMap is a map from a BB to the best insertion points for the
+ // subtree of BB (subtree not including the BB itself).
+ DenseMap<BasicBlock *, InsertPtsCostPair> InsertPtsMap;
+ InsertPtsMap.reserve(Orders.size() + 1);
+ for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) {
+ BasicBlock *Node = *RIt;
+ bool NodeInBBs = BBs.count(Node);
+ SmallPtrSet<BasicBlock *, 16> &InsertPts = InsertPtsMap[Node].first;
+ BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second;
+
+ // Return the optimal insert points in BBs.
+ if (Node == Entry) {
+ BBs.clear();
+ if (InsertPtsFreq > BFI.getBlockFreq(Node))
+ BBs.insert(Entry);
+ else
+ BBs.insert(InsertPts.begin(), InsertPts.end());
+ break;
+ }
+
+ BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock();
+ // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child
+ // will update its parent's ParentInsertPts and ParentPtsFreq.
+ SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first;
+ BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second;
+ // Choose to insert in Node or in subtree of Node.
+ if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) {
+ ParentInsertPts.insert(Node);
+ ParentPtsFreq += BFI.getBlockFreq(Node);
+ } else {
+ ParentInsertPts.insert(InsertPts.begin(), InsertPts.end());
+ ParentPtsFreq += InsertPtsFreq;
+ }
+ }
+}
+
/// \brief Find an insertion point that dominates all uses.
-Instruction *ConstantHoistingPass::findConstantInsertionPoint(
+SmallPtrSet<Instruction *, 8> ConstantHoistingPass::findConstantInsertionPoint(
const ConstantInfo &ConstInfo) const {
assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
// Collect all basic blocks.
SmallPtrSet<BasicBlock *, 8> BBs;
+ SmallPtrSet<Instruction *, 8> InsertPts;
for (auto const &RCI : ConstInfo.RebasedConstants)
for (auto const &U : RCI.Uses)
BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
- if (BBs.count(Entry))
- return &Entry->front();
+ if (BBs.count(Entry)) {
+ InsertPts.insert(&Entry->front());
+ return InsertPts;
+ }
+
+ if (BFI) {
+ findBestInsertionSet(*DT, *BFI, Entry, BBs);
+ for (auto BB : BBs) {
+ BasicBlock::iterator InsertPt = BB->begin();
+ for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
+ ;
+ InsertPts.insert(&*InsertPt);
+ }
+ return InsertPts;
+ }
while (BBs.size() >= 2) {
BasicBlock *BB, *BB1, *BB2;
BB1 = *BBs.begin();
BB2 = *std::next(BBs.begin());
BB = DT->findNearestCommonDominator(BB1, BB2);
- if (BB == Entry)
- return &Entry->front();
+ if (BB == Entry) {
+ InsertPts.insert(&Entry->front());
+ return InsertPts;
+ }
BBs.erase(BB1);
BBs.erase(BB2);
BBs.insert(BB);
}
assert((BBs.size() == 1) && "Expected only one element.");
Instruction &FirstInst = (*BBs.begin())->front();
- return findMatInsertPt(&FirstInst);
+ InsertPts.insert(findMatInsertPt(&FirstInst));
+ return InsertPts;
}
@@ -557,29 +679,54 @@ bool ConstantHoistingPass::emitBaseConstants() {
bool MadeChange = false;
for (auto const &ConstInfo : ConstantVec) {
// Hoist and hide the base constant behind a bitcast.
- Instruction *IP = findConstantInsertionPoint(ConstInfo);
- IntegerType *Ty = ConstInfo.BaseConstant->getType();
- Instruction *Base =
- new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
- DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB "
- << IP->getParent()->getName() << '\n' << *Base << '\n');
- NumConstantsHoisted++;
+ SmallPtrSet<Instruction *, 8> IPSet = findConstantInsertionPoint(ConstInfo);
+ assert(!IPSet.empty() && "IPSet is empty");
+
+ unsigned UsesNum = 0;
+ unsigned ReBasesNum = 0;
+ for (Instruction *IP : IPSet) {
+ IntegerType *Ty = ConstInfo.BaseConstant->getType();
+ Instruction *Base =
+ new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP);
+ DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant
+ << ") to BB " << IP->getParent()->getName() << '\n'
+ << *Base << '\n');
+
+ // Emit materialization code for all rebased constants.
+ unsigned Uses = 0;
+ for (auto const &RCI : ConstInfo.RebasedConstants) {
+ for (auto const &U : RCI.Uses) {
+ Uses++;
+ BasicBlock *OrigMatInsertBB =
+ findMatInsertPt(U.Inst, U.OpndIdx)->getParent();
+ // If Base constant is to be inserted in multiple places,
+ // generate rebase for U using the Base dominating U.
+ if (IPSet.size() == 1 ||
+ DT->dominates(Base->getParent(), OrigMatInsertBB)) {
+ emitBaseConstants(Base, RCI.Offset, U);
+ ReBasesNum++;
+ }
+ }
+ }
+ UsesNum = Uses;
- // Emit materialization code for all rebased constants.
- for (auto const &RCI : ConstInfo.RebasedConstants) {
- NumConstantsRebased++;
- for (auto const &U : RCI.Uses)
- emitBaseConstants(Base, RCI.Offset, U);
+ // Use the same debug location as the last user of the constant.
+ assert(!Base->use_empty() && "The use list is empty!?");
+ assert(isa<Instruction>(Base->user_back()) &&
+ "All uses should be instructions.");
+ Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
}
+ (void)UsesNum;
+ (void)ReBasesNum;
+ // Expect all uses are rebased after rebase is done.
+ assert(UsesNum == ReBasesNum && "Not all uses are rebased");
+
+ NumConstantsHoisted++;
- // Use the same debug location as the last user of the constant.
- assert(!Base->use_empty() && "The use list is empty!?");
- assert(isa<Instruction>(Base->user_back()) &&
- "All uses should be instructions.");
- Base->setDebugLoc(cast<Instruction>(Base->user_back())->getDebugLoc());
+ // Base constant is also included in ConstInfo.RebasedConstants, so
+ // deduct 1 from ConstInfo.RebasedConstants.size().
+ NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1;
- // Correct for base constant, which we counted above too.
- NumConstantsRebased--;
MadeChange = true;
}
return MadeChange;
@@ -595,9 +742,11 @@ void ConstantHoistingPass::deleteDeadCastInst() const {
/// \brief Optimize expensive integer constants in the given function.
bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
- DominatorTree &DT, BasicBlock &Entry) {
+ DominatorTree &DT, BlockFrequencyInfo *BFI,
+ BasicBlock &Entry) {
this->TTI = &TTI;
this->DT = &DT;
+ this->BFI = BFI;
this->Entry = &Entry;
// Collect all constant candidates.
collectConstantCandidates(Fn);
@@ -628,7 +777,10 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!runImpl(F, TTI, DT, F.getEntryBlock()))
+ auto BFI = ConstHoistWithBlockFrequency
+ ? &AM.getResult<BlockFrequencyAnalysis>(F)
+ : nullptr;
+ if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock()))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index c843c61ea94e..b5a4cc2f3953 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -12,8 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
@@ -26,6 +26,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -95,7 +96,8 @@ static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
return true;
}
-static bool processPHI(PHINode *P, LazyValueInfo *LVI) {
+static bool processPHI(PHINode *P, LazyValueInfo *LVI,
+ const SimplifyQuery &SQ) {
bool Changed = false;
BasicBlock *BB = P->getParent();
@@ -149,9 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI) {
Changed = true;
}
- // FIXME: Provide TLI, DT, AT to SimplifyInstruction.
- const DataLayout &DL = BB->getModule()->getDataLayout();
- if (Value *V = SimplifyInstruction(P, DL)) {
+ if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
Changed = true;
@@ -488,9 +488,8 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
ConstantInt::getFalse(C->getContext());
}
-static bool runImpl(Function &F, LazyValueInfo *LVI) {
+static bool runImpl(Function &F, LazyValueInfo *LVI, const SimplifyQuery &SQ) {
bool FnChanged = false;
-
// Visiting in a pre-order depth-first traversal causes us to simplify early
// blocks before querying later blocks (which require us to analyze early
// blocks). Eagerly simplifying shallow blocks means there is strictly less
@@ -505,7 +504,7 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) {
BBChanged |= processSelect(cast<SelectInst>(II), LVI);
break;
case Instruction::PHI:
- BBChanged |= processPHI(cast<PHINode>(II), LVI);
+ BBChanged |= processPHI(cast<PHINode>(II), LVI, SQ);
break;
case Instruction::ICmp:
case Instruction::FCmp:
@@ -566,14 +565,25 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
return false;
LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- return runImpl(F, LVI);
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+ auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>();
+ auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
+ const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
+ return runImpl(F, LVI, SQ);
}
PreservedAnalyses
CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
- bool Changed = runImpl(F, LVI);
+ auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
+ auto *AC = AM.getCachedResult<AssumptionAnalysis>(F);
+ const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC);
+ bool Changed = runImpl(F, LVI, SQ);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index 7019287954a1..48eda09c463e 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -51,6 +51,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -537,9 +538,9 @@ bool GuardWideningImpl::parseRangeChecks(
} else if (match(Check.getBase(),
m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(OpLHS, KnownZero, KnownOne, DL);
- if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) {
+ KnownBits Known(BitWidth);
+ computeKnownBits(OpLHS, Known, DL);
+ if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) {
Check.setBase(OpLHS);
APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue();
Check.setOffset(ConstantInt::get(Ctx, NewOffset));
diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 5d8701431a2c..9e2563879da2 100644
--- a/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -152,15 +152,15 @@ private:
Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
- Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const;
+ Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const;
bool rewriteIntrinsicOperands(IntrinsicInst *II,
Value *OldV, Value *NewV) const;
void collectRewritableIntrinsicOperands(
IntrinsicInst *II,
- std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const;
+ std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const;
std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
@@ -204,7 +204,6 @@ static bool isAddressExpression(const Value &V) {
//
// Precondition: V is an address expression.
static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
- assert(isAddressExpression(V));
const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
@@ -254,8 +253,8 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
// TODO: Move logic to TTI?
void InferAddressSpaces::collectRewritableIntrinsicOperands(
- IntrinsicInst *II, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const {
+ IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const {
switch (II->getIntrinsicID()) {
case Intrinsic::objectsize:
case Intrinsic::amdgcn_atomic_inc:
@@ -272,13 +271,13 @@ void InferAddressSpaces::collectRewritableIntrinsicOperands(
// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
- Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) const {
+ Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
+ DenseSet<Value *> &Visited) const {
assert(V->getType()->isPointerTy());
if (isAddressExpression(*V) &&
V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
- if (Visited->insert(V).second)
- PostorderStack->push_back(std::make_pair(V, false));
+ if (Visited.insert(V).second)
+ PostorderStack.push_back(std::make_pair(V, false));
}
}
@@ -293,14 +292,18 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
DenseSet<Value *> Visited;
auto PushPtrOperand = [&](Value *Ptr) {
- appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack,
- &Visited);
+ appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack,
+ Visited);
};
- // We only explore address expressions that are reachable from loads and
- // stores for now because we aim at generating faster loads and stores.
+ // Look at operations that may be interesting accelerate by moving to a known
+ // address space. We aim at generating after loads and stores, but pure
+ // addressing calculations may also be faster.
for (Instruction &I : instructions(F)) {
- if (auto *LI = dyn_cast<LoadInst>(&I))
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ if (!GEP->getType()->isVectorTy())
+ PushPtrOperand(GEP->getPointerOperand());
+ } else if (auto *LI = dyn_cast<LoadInst>(&I))
PushPtrOperand(LI->getPointerOperand());
else if (auto *SI = dyn_cast<StoreInst>(&I))
PushPtrOperand(SI->getPointerOperand());
@@ -316,7 +319,7 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
if (auto *MTI = dyn_cast<MemTransferInst>(MI))
PushPtrOperand(MTI->getRawSource());
} else if (auto *II = dyn_cast<IntrinsicInst>(&I))
- collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
+ collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
// FIXME: Handle vectors of pointers
if (Cmp->getOperand(0)->getType()->isPointerTy()) {
@@ -338,8 +341,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().second = true;
for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
- appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack,
- &Visited);
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
+ Visited);
}
}
return Postorder;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 08eb95a1a3d3..a0da81605a80 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1289,6 +1289,36 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (PredToDestList.empty())
return false;
+ // If all the predecessors go to a single known successor, we want to fold,
+ // not thread. By doing so, we do not need to duplicate the current block and
+ // also miss potential opportunities in case we dont/cant duplicate.
+ if (OnlyDest && OnlyDest != MultipleDestSentinel) {
+ if (PredToDestList.size() ==
+ (size_t)std::distance(pred_begin(BB), pred_end(BB))) {
+ bool SeenFirstBranchToOnlyDest = false;
+ for (BasicBlock *SuccBB : successors(BB)) {
+ if (SuccBB == OnlyDest && !SeenFirstBranchToOnlyDest)
+ SeenFirstBranchToOnlyDest = true; // Don't modify the first branch.
+ else
+ SuccBB->removePredecessor(BB, true); // This is unreachable successor.
+ }
+
+ // Finally update the terminator.
+ TerminatorInst *Term = BB->getTerminator();
+ BranchInst::Create(OnlyDest, Term);
+ Term->eraseFromParent();
+
+ // If the condition is now dead due to the removal of the old terminator,
+ // erase it.
+ auto *CondInst = dyn_cast<Instruction>(Cond);
+ if (CondInst && CondInst->use_empty())
+ CondInst->eraseFromParent();
+ // FIXME: in case this instruction is defined in the current BB and it
+ // resolves to a single value from all predecessors, we can do RAUW.
+ return true;
+ }
+ }
+
// Determine which is the most common successor. If we have many inputs and
// this block is a switch, we want to start by threading the batch that goes
// to the most popular destination first. If we only know about one
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 946d85d7360f..5042fc18d7c4 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -345,6 +345,11 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
if (!SI->isSimple())
return false;
+ // Don't convert stores of non-integral pointer types to memsets (which stores
+ // integers).
+ if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType()))
+ return false;
+
// Avoid merging nontemporal stores.
if (SI->getMetadata(LLVMContext::MD_nontemporal))
return false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e5689368de80..8ce96cf1b7a6 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -58,13 +58,14 @@ class LoopRotate {
AssumptionCache *AC;
DominatorTree *DT;
ScalarEvolution *SE;
+ const SimplifyQuery &SQ;
public:
LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE)
- : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) {
- }
+ DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ)
+ : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
+ SQ(SQ) {}
bool processLoop(Loop *L);
private:
@@ -311,8 +312,6 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-
// For the rest of the instructions, either hoist to the OrigPreheader if
// possible or create a clone in the OldPreHeader if not.
TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
@@ -342,8 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// With the operands remapped, see if the instruction constant folds or is
// otherwise simplifyable. This commonly occurs because the entry from PHI
// nodes allows icmps and other instructions to fold.
- // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
- Value *V = SimplifyInstruction(C, DL);
+ Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C));
if (V && LI->replacementPreservesLCSSAForm(C, V)) {
// If so, then delete the temporary instruction and stick the folded value
// in the map.
@@ -671,7 +669,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
- LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE);
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC);
+ LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ);
bool Changed = LR.processLoop(&L);
if (!Changed)
@@ -714,7 +714,11 @@ public:
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
- LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE);
+ auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr;
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ const SimplifyQuery SQ(DL, TLI, DT, AC);
+ LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);
return LR.processLoop(L);
}
};
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index a99c9999c619..8fa806a7e8bc 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This pass transforms loops that contain branches on loop-invariant conditions
-// to have multiple loops. For example, it turns the left into the right code:
+// to multiple loops. For example, it turns the left into the right code:
//
// for (...) if (lic)
// A for (...)
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 659353e912fe..49ce0262c97b 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -352,20 +352,10 @@ Value *StructurizeCFG::invert(Value *Condition) {
if (Instruction *Inst = dyn_cast<Instruction>(Condition)) {
// Third: Check all the users for an invert
BasicBlock *Parent = Inst->getParent();
- for (User *U : Condition->users()) {
- if (Instruction *I = dyn_cast<Instruction>(U)) {
+ for (User *U : Condition->users())
+ if (Instruction *I = dyn_cast<Instruction>(U))
if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition))))
return I;
- }
- }
-
- // Avoid creating a new instruction in the common case of a compare.
- if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
- if (Cmp->hasOneUse()) {
- Cmp->setPredicate(Cmp->getInversePredicate());
- return Cmp;
- }
- }
// Last option: Create a new instruction
return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 1cfe3bd53648..7ffdad597a9b 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -256,14 +257,14 @@ ValueRange FastDivInsertionTask::getValueRange(Value *V,
unsigned HiBits = LongLen - ShortLen;
const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
- APInt Zeros(LongLen, 0), Ones(LongLen, 0);
+ KnownBits Known(LongLen);
- computeKnownBits(V, Zeros, Ones, DL);
+ computeKnownBits(V, Known, DL);
- if (Zeros.countLeadingOnes() >= HiBits)
+ if (Known.Zero.countLeadingOnes() >= HiBits)
return VALRNG_KNOWN_SHORT;
- if (Ones.countLeadingZeros() < HiBits)
+ if (Known.One.countLeadingZeros() < HiBits)
return VALRNG_LIKELY_LONG;
// Long integer divisions are often used in hashtable implementations. It's
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 82552684b832..ed72099ec3ed 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -73,24 +73,26 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
}
/// \brief Build a set of blocks to extract if the input blocks are viable.
-template <typename IteratorT>
-static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
- IteratorT BBEnd) {
+static SetVector<BasicBlock *>
+buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT) {
+ assert(!BBs.empty() && "The set of blocks to extract must be non-empty");
SetVector<BasicBlock *> Result;
- assert(BBBegin != BBEnd);
-
// Loop over the blocks, adding them to our set-vector, and aborting with an
// empty set if we encounter invalid blocks.
- do {
- if (!Result.insert(*BBBegin))
- llvm_unreachable("Repeated basic blocks in extraction input");
+ for (BasicBlock *BB : BBs) {
- if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) {
+ // If this block is dead, don't process it.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
+ if (!Result.insert(BB))
+ llvm_unreachable("Repeated basic blocks in extraction input");
+ if (!CodeExtractor::isBlockValidForExtraction(*BB)) {
Result.clear();
return Result;
}
- } while (++BBBegin != BBEnd);
+ }
#ifndef NDEBUG
for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()),
@@ -106,23 +108,17 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
return Result;
}
-/// \brief Helper to call buildExtractionBlockSet with an ArrayRef.
-static SetVector<BasicBlock *>
-buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs) {
- return buildExtractionBlockSet(BBs.begin(), BBs.end());
-}
-
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
bool AggregateArgs, BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
+ BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {}
CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
BlockFrequencyInfo *BFI,
BranchProbabilityInfo *BPI)
: DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
- BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),
+ BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)),
NumExitBlocks(~0U) {}
/// definedInRegion - Return true if the specified value is defined in the
@@ -194,9 +190,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// containing PHI nodes merging values from outside of the region, and a
// second that contains all of the code for the block and merges back any
// incoming values from inside of the region.
- BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();
- BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
- Header->getName()+".ce");
+ BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT);
// We only want to code extract the second block now, and it becomes the new
// header of the region.
@@ -205,11 +199,6 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
Blocks.insert(NewBB);
Header = NewBB;
- // Okay, update dominator sets. The blocks that dominate the new one are the
- // blocks that dominate TIBB plus the new block itself.
- if (DT)
- DT->splitBlock(NewBB);
-
// Okay, now we need to adjust the PHI nodes and any branches from within the
// region to go to the new header block instead of the old header block.
if (NumPredsFromRegion) {
@@ -224,12 +213,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
// Okay, everything within the region is now branching to the right block, we
// just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+ BasicBlock::iterator AfterPHIs;
for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
PHINode *PN = cast<PHINode>(AfterPHIs);
// Create a new PHI node in the new region, which has an incoming value
// from OldPred of PN.
PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
PN->getName() + ".ce", &NewBB->front());
+ PN->replaceAllUsesWith(NewPN);
NewPN->addIncoming(PN, OldPred);
// Loop over all of the incoming value in PN, moving them to NewPN if they
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8c5442762643..d3002c5fb750 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -1038,9 +1039,9 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
"getOrEnforceKnownAlignment expects a pointer!");
unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType());
- APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- computeKnownBits(V, KnownZero, KnownOne, DL, 0, AC, CxtI, DT);
- unsigned TrailZ = KnownZero.countTrailingOnes();
+ KnownBits Known(BitWidth);
+ computeKnownBits(V, Known, DL, 0, AC, CxtI, DT);
+ unsigned TrailZ = Known.Zero.countTrailingOnes();
// Avoid trouble with ridiculously large TrailZ values, such as
// those computed from a null pointer.
@@ -1268,21 +1269,37 @@ static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
}
}
-/// Prepend \p DIExpr with a deref and offset operation.
+enum { WithStackValue = true };
+
+/// Prepend \p DIExpr with a deref and offset operation and optionally turn it
+/// into a stack value.
static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
- bool Deref, int64_t Offset) {
- if (!Deref && !Offset)
+ bool Deref, int64_t Offset = 0,
+ bool StackValue = false) {
+ if (!Deref && !Offset && !StackValue)
return DIExpr;
- // Create a copy of the original DIDescriptor for user variable, prepending
- // "deref" operation to a list of address elements, as new llvm.dbg.declare
- // will take a value storing address of the memory for variable, not
- // alloca itself.
- SmallVector<uint64_t, 4> Ops;
+
+ SmallVector<uint64_t, 8> Ops;
+ appendOffset(Ops, Offset);
if (Deref)
Ops.push_back(dwarf::DW_OP_deref);
- appendOffset(Ops, Offset);
if (DIExpr)
- Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ for (auto Op : DIExpr->expr_ops()) {
+ // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment.
+ if (StackValue) {
+ if (Op.getOp() == dwarf::DW_OP_stack_value)
+ StackValue = false;
+ else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
+ Ops.push_back(dwarf::DW_OP_stack_value);
+ StackValue = false;
+ }
+ }
+ Ops.push_back(Op.getOp());
+ for (unsigned I = 0; I < Op.getNumArgs(); ++I)
+ Ops.push_back(Op.getArg(I));
+ }
+ if (StackValue)
+ Ops.push_back(dwarf::DW_OP_stack_value);
return Builder.createExpression(Ops);
}
@@ -1374,12 +1391,15 @@ void llvm::salvageDebugInfo(Instruction &I) {
unsigned BitWidth =
M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
APInt Offset(BitWidth, 0);
- // Rewrite a constant GEP into a DIExpression.
+ // Rewrite a constant GEP into a DIExpression. Since we are performing
+ // arithmetic to compute the variable's *value* in the DIExpression, we
+ // need to mark the expression with a DW_OP_stack_value.
if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
- // GEP offsets are i32 and thus alwaus fit into an int64_t.
- DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());
+ // GEP offsets are i32 and thus always fit into an int64_t.
+ DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(),
+ WithStackValue);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
@@ -1391,7 +1411,7 @@ void llvm::salvageDebugInfo(Instruction &I) {
// Rewrite the load into DW_OP_deref.
auto *DIExpr = DVI->getExpression();
DIBuilder DIB(M, /*AllowUnresolved*/ false);
- DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);
+ DIExpr = prependDIExpr(DIB, DIExpr, WithDeref);
DVI->setOperand(0, MDWrap(I.getOperand(0)));
DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 3c669ce644e2..43ab725b0769 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -318,6 +318,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
return false;
}
+ // The current loop unroll pass can only unroll loops with a single latch
+ // that's a conditional branch exiting the loop.
+ // FIXME: The implementation can be extended to work with more complicated
+ // cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
@@ -328,6 +332,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
return false;
}
+ auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
+ return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
+ };
+
+ if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
+ DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
+ " exiting the loop can be unrolled\n");
+ return false;
+ }
+
if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
DEBUG(dbgs() <<
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index b375d51005d5..8959e77438e9 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -403,6 +403,14 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
Value *Val = SI->getCondition(); // The value we are switching on...
BasicBlock* Default = SI->getDefaultDest();
+ // Don't handle unreachable blocks. If there are successors with phis, this
+ // would leave them behind with missing predecessors.
+ if ((CurBlock != &F->getEntryBlock() && pred_empty(CurBlock)) ||
+ CurBlock->getSinglePredecessor() == CurBlock) {
+ DeleteList.insert(CurBlock);
+ return;
+ }
+
// If there is only the default destination, just branch.
if (!SI->getNumCases()) {
BranchInst::Create(Default, CurBlock);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2f575b9d5027..f86e97b6cc72 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -60,6 +60,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -3055,6 +3056,15 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
BasicBlock *QFB = QBI->getSuccessor(1);
BasicBlock *PostBB = QFB->getSingleSuccessor();
+ // Make sure we have a good guess for PostBB. If QTB's only successor is
+ // QFB, then QFB is a better PostBB.
+ if (QTB->getSingleSuccessor() == QFB)
+ PostBB = QFB;
+
+ // If we couldn't find a good PostBB, stop.
+ if (!PostBB)
+ return false;
+
bool InvertPCond = false, InvertQCond = false;
// Canonicalize fallthroughs to the true branches.
if (PFB == QBI->getParent()) {
@@ -3079,8 +3089,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
};
- if (!PostBB ||
- !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+ if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
!HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
return false;
if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
@@ -3746,7 +3755,7 @@ bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
if (!isa<DbgInfoIntrinsic>(I))
return false;
- SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;
+ SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
auto *PhiLPInst = cast<PHINode>(RI->getValue());
// Check incoming blocks to see if any of them are trivial.
@@ -4359,8 +4368,8 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
const DataLayout &DL) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
- APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- computeKnownBits(Cond, KnownZero, KnownOne, DL, 0, AC, SI);
+ KnownBits Known(Bits);
+ computeKnownBits(Cond, Known, DL, 0, AC, SI);
// We can also eliminate cases by determining that their values are outside of
// the limited range of the condition based on how many significant (non-sign)
@@ -4372,7 +4381,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
SmallVector<ConstantInt *, 8> DeadCases;
for (auto &Case : SI->cases()) {
APInt CaseVal = Case.getCaseValue()->getValue();
- if ((CaseVal & KnownZero) != 0 || (CaseVal & KnownOne) != KnownOne ||
+ if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n");
@@ -4386,7 +4395,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
const unsigned NumUnknownBits =
- Bits - (KnownZero | KnownOne).countPopulation();
+ Bits - (Known.Zero | Known.One).countPopulation();
assert(NumUnknownBits <= Bits);
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index f6070868de44..27373427d4f7 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -35,10 +35,8 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of redundant instructions removed");
-static bool runImpl(Function &F, const DominatorTree *DT,
- const TargetLibraryInfo *TLI, AssumptionCache *AC,
+static bool runImpl(Function &F, const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
- const DataLayout &DL = F.getParent()->getDataLayout();
SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -56,7 +54,8 @@ static bool runImpl(Function &F, const DominatorTree *DT,
// Don't waste time simplifying unused instructions.
if (!I->use_empty()) {
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) {
+ if (Value *V =
+ SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
@@ -65,7 +64,7 @@ static bool runImpl(Function &F, const DominatorTree *DT,
Changed = true;
}
}
- if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) {
+ if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) {
// RecursivelyDeleteTriviallyDeadInstruction can remove more than one
// instruction, so simply incrementing the iterator does not work.
// When instructions get deleted re-iterate instead.
@@ -113,8 +112,9 @@ namespace {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
OptimizationRemarkEmitter *ORE =
&getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return runImpl(F, DT, TLI, AC, ORE);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const SimplifyQuery SQ(DL, TLI, DT, AC);
+ return runImpl(F, SQ, ORE);
}
};
}
@@ -141,7 +141,9 @@ PreservedAnalyses InstSimplifierPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE);
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const SimplifyQuery SQ(DL, &TLI, &DT, &AC);
+ bool Changed = runImpl(F, SQ, &ORE);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index aa71e3669ea2..2c1c30463a23 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -37,10 +38,6 @@ using namespace llvm;
using namespace PatternMatch;
static cl::opt<bool>
- ColdErrorCalls("error-reporting-is-cold", cl::init(true), cl::Hidden,
- cl::desc("Treat error-reporting calls as cold"));
-
-static cl::opt<bool>
EnableUnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
cl::init(false),
cl::desc("Enable unsafe double to float "
@@ -459,11 +456,9 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
Value *Offset = GEP->getOperand(2);
unsigned BitWidth = Offset->getType()->getIntegerBitWidth();
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI,
- nullptr);
- KnownZero.flipAllBits();
+ KnownBits Known(BitWidth);
+ computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr);
+ Known.Zero.flipAllBits();
size_t ArrSize =
cast<ArrayType>(GEP->getSourceElementType())->getNumElements();
@@ -477,7 +472,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {
// optimize if we can prove that the program has undefined behavior when
// Offset is outside that range. That is the case when GEP->getOperand(0)
// is a pointer to an object whose memory extent is NullTermIdx+1.
- if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) ||
+ if ((Known.Zero.isNonNegative() && Known.Zero.ule(NullTermIdx)) ||
(GEP->isInBounds() && isa<GlobalVariable>(GEP->getOperand(0)) &&
NullTermIdx == ArrSize - 1))
return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx),
@@ -846,6 +841,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
// Is the inner call really malloc()?
Function *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return nullptr;
+
LibFunc Func;
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
Func != LibFunc_malloc)
@@ -930,6 +928,24 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
if (V == nullptr)
return nullptr;
+ // If call isn't an intrinsic, check that it isn't within a function with the
+ // same name as the float version of this call.
+ //
+ // e.g. inline float expf(float val) { return (float) exp((double) val); }
+ //
+ // A similar such definition exists in the MinGW-w64 math.h header file which
+ // when compiled with -O2 -ffast-math causes the generation of infinite loops
+ // where expf is called.
+ if (!Callee->isIntrinsic()) {
+ const Function *F = CI->getFunction();
+ StringRef FName = F->getName();
+ StringRef CalleeName = Callee->getName();
+ if ((FName.size() == (CalleeName.size() + 1)) &&
+ (FName.back() == 'f') &&
+ FName.startswith(CalleeName))
+ return nullptr;
+ }
+
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
@@ -1632,7 +1648,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
}
static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
- if (!ColdErrorCalls || !Callee || !Callee->isDeclaration())
+ if (!Callee || !Callee->isDeclaration())
return false;
if (StreamArg < 0)
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 4409d7a404f8..97dcb40a1d72 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -30,6 +30,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
@@ -65,7 +66,9 @@ public:
bool run();
private:
- Value *getPointerOperand(Value *I);
+ Value *getPointerOperand(Value *I) const;
+
+ GetElementPtrInst *getSourceGEP(Value *Src) const;
unsigned getPointerAddressSpace(Value *I);
@@ -215,7 +218,7 @@ bool Vectorizer::run() {
return Changed;
}
-Value *Vectorizer::getPointerOperand(Value *I) {
+Value *Vectorizer::getPointerOperand(Value *I) const {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->getPointerOperand();
if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -231,6 +234,19 @@ unsigned Vectorizer::getPointerAddressSpace(Value *I) {
return -1;
}
+GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const {
+ // First strip pointer bitcasts. Make sure pointee size is the same with
+ // and without casts.
+ // TODO: a stride set by the add instruction below can match the difference
+ // in pointee type size here. Currently it will not be vectorized.
+ Value *SrcPtr = getPointerOperand(Src);
+ Value *SrcBase = SrcPtr->stripPointerCasts();
+ if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) ==
+ DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType()))
+ SrcPtr = SrcBase;
+ return dyn_cast<GetElementPtrInst>(SrcPtr);
+}
+
// FIXME: Merge with llvm::isConsecutiveAccess
bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
Value *PtrA = getPointerOperand(A);
@@ -283,8 +299,8 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
// Look through GEPs after checking they're the same except for the last
// index.
- GetElementPtrInst *GEPA = dyn_cast<GetElementPtrInst>(getPointerOperand(A));
- GetElementPtrInst *GEPB = dyn_cast<GetElementPtrInst>(getPointerOperand(B));
+ GetElementPtrInst *GEPA = getSourceGEP(A);
+ GetElementPtrInst *GEPB = getSourceGEP(B);
if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
return false;
unsigned FinalIndex = GEPA->getNumOperands() - 1;
@@ -328,11 +344,9 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
// If any bits are known to be zero other than the sign bit in OpA, we can
// add 1 to it while guaranteeing no overflow of any sort.
if (!Safe) {
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT);
- KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1);
- if (KnownZero != 0)
+ KnownBits Known(BitWidth);
+ computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
+ if (Known.Zero.countTrailingZeros() < (BitWidth - 1))
Safe = true;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7eb8fabe0b2f..87ce0194dad6 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3586,8 +3586,12 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
IRBuilder<> B(MiddleBlock->getTerminator());
Value *CountMinusOne = B.CreateSub(
CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
- Value *CMO = B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType(),
- "cast.cmo");
+ Value *CMO =
+ !II.getStep()->getType()->isIntegerTy()
+ ? B.CreateCast(Instruction::SIToFP, CountMinusOne,
+ II.getStep()->getType())
+ : B.CreateSExtOrTrunc(CountMinusOne, II.getStep()->getType());
+ CMO->setName("cast.cmo");
Value *Escape = II.transform(B, CMO, PSE.getSE(), DL);
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
@@ -4516,14 +4520,15 @@ void InnerLoopVectorizer::predicateInstructions() {
for (auto KV : PredicatedInstructions) {
BasicBlock::iterator I(KV.first);
BasicBlock *Head = I->getParent();
- auto *BB = SplitBlock(Head, &*std::next(I), DT, LI);
auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
/*BranchWeights=*/nullptr, DT, LI);
I->moveBefore(T);
sinkScalarOperands(&*I);
- I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if");
- BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue");
+ BasicBlock *PredicatedBlock = I->getParent();
+ Twine BBNamePrefix = Twine("pred.") + I->getOpcodeName();
+ PredicatedBlock->setName(BBNamePrefix + ".if");
+ PredicatedBlock->getSingleSuccessor()->setName(BBNamePrefix + ".continue");
// If the instruction is non-void create a Phi node at reconvergence point.
if (!I->getType()->isVoidTy()) {
@@ -7324,8 +7329,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
VectorTy, VF - 1, VectorTy);
- // TODO: IF-converted IFs become selects.
- return 0;
+ // Phi nodes in non-header blocks (not inductions, reductions, etc.) are
+ // converted into select instructions. We require N - 1 selects per phi
+ // node, where N is the number of incoming values.
+ if (VF > 1 && Phi->getParent() != TheLoop->getHeader())
+ return (Phi->getNumIncomingValues() - 1) *
+ TTI.getCmpSelInstrCost(
+ Instruction::Select, ToVectorTy(Phi->getType(), VF),
+ ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF));
+
+ return TTI.getCFInstrCost(Instruction::PHI);
}
case Instruction::UDiv:
case Instruction::SDiv: