summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/ScalarEvolution.cpp30
-rw-r--r--lib/Analysis/ValueTracking.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp11
-rw-r--r--lib/CodeGen/VirtRegMap.cpp30
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp4
-rw-r--r--lib/DebugInfo/DWARF/DWARFVerifier.cpp8
-rw-r--r--lib/Object/COFFImportFile.cpp6
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp19
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp38
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td10
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp44
-rw-r--r--lib/Target/X86/X86InstrAVX512.td55
-rw-r--r--lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp23
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp10
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp44
16 files changed, 275 insertions, 77 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index b973203a89b66..9539fd7c75596 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -162,6 +162,11 @@ static cl::opt<unsigned>
cl::desc("Maximum depth of recursive SExt/ZExt"),
cl::init(8));
+static cl::opt<unsigned>
+ MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden,
+ cl::desc("Max coefficients in AddRec during evolving"),
+ cl::init(16));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -2878,6 +2883,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
continue;
+ // Limit max number of arguments to avoid creation of unreasonably big
+ // SCEVAddRecs with very complex operands.
+ if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 >
+ MaxAddRecSize)
+ continue;
+
bool Overflow = false;
Type *Ty = AddRec->getType();
bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
@@ -7582,6 +7593,25 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
if (const SCEVConstant *BTCC =
dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+
+ // This trivial case can show up in some degenerate cases where
+ // the incoming IR has not yet been fully simplified.
+ if (BTCC->getValue()->isZero()) {
+ Value *InitValue = nullptr;
+ bool MultipleInitValues = false;
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (!LI->contains(PN->getIncomingBlock(i))) {
+ if (!InitValue)
+ InitValue = PN->getIncomingValue(i);
+ else if (InitValue != PN->getIncomingValue(i)) {
+ MultipleInitValues = true;
+ break;
+ }
+ }
+ if (!MultipleInitValues && InitValue)
+ return getSCEV(InitValue);
+ }
+ }
// Okay, we know how many times the containing loop executes. If
// this is a constant evolving PHI node, get the final value at
// the specified iteration number.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 439b21a812587..cdfe74d158c95 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -4458,6 +4458,10 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT) {
+ // Bail out when we hit the limit.
+ if (Depth == MaxDepth)
+ return None;
+
// A mismatch occurs when we compare a scalar cmp to a vector cmp, for example.
if (LHS->getType() != RHS->getType())
return None;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0cad20db09648..ecb54e1e4b41c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -302,7 +302,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
- SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue Cond = N->getOperand(0);
+ EVT OpVT = Cond.getValueType();
+ SDLoc DL(N);
+ // The vselect result and true/value operands needs scalarizing, but it's
+ // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
+ // See the similar logic in ScalarizeVecRes_VSETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Cond = GetScalarizedVector(Cond);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Cond = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
SDValue LHS = GetScalarizedVector(N->getOperand(1));
TargetLowering::BooleanContent ScalarBool =
TLI.getBooleanContents(false, false);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 823e77850c4ba..0ff154784f685 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7262,22 +7262,23 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
AddDbgValue(I, ToNode, false);
}
-void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
- SDValue NewMemOp) {
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+ SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
- if (!OldLoad->hasAnyUseOfValue(1))
- return;
-
// The new memory operation must have the same position as the old load in
// terms of memory dependency. Create a TokenFactor for the old load and new
// memory operation and update uses of the old load's output chain to use that
// TokenFactor.
SDValue OldChain = SDValue(OldLoad, 1);
SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
+ if (!OldLoad->hasAnyUseOfValue(1))
+ return NewChain;
+
SDValue TokenFactor =
getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain);
ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ return TokenFactor;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 124c2790f68c4..f8aacdb8649d4 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -180,6 +180,7 @@ class VirtRegRewriter : public MachineFunctionPass {
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
void expandCopyBundle(MachineInstr &MI) const;
+ bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const;
public:
static char ID;
@@ -415,6 +416,32 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
}
}
+/// Check whether (part of) \p SuperPhysReg is live through \p MI.
+/// \pre \p MI defines a subregister of a virtual register that
+/// has been assigned to \p SuperPhysReg.
+bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
+ unsigned SuperPhysReg) const {
+ SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+ SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
+ SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
+ for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) {
+ const LiveRange &UnitRange = LIS->getRegUnit(*Unit);
+ // If the regunit is live both before and after MI,
+ // we assume it is live through.
+ // Generally speaking, this is not true, because something like
+ // "RU = op RU" would match that description.
+ // However, we know that we are trying to assess whether
+ // a def of a virtual reg, vreg, is live at the same time of RU.
+ // If we are in the "RU = op RU" situation, that means that vreg
+ // is defined at the same time as RU (i.e., "vreg, RU = op RU").
+ // Thus, vreg and RU interferes and vreg cannot be assigned to
+ // SuperPhysReg. Therefore, this situation cannot happen.
+ if (UnitRange.liveAt(AfterMIDefs) && UnitRange.liveAt(BeforeMIUses))
+ return true;
+ }
+ return false;
+}
+
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@@ -452,7 +479,8 @@ void VirtRegRewriter::rewrite() {
// A virtual register kill refers to the whole register, so we may
// have to add <imp-use,kill> operands for the super-register. A
// partial redef always kills and redefines the super-register.
- if (MO.readsReg() && (MO.isDef() || MO.isKill()))
+ if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
+ (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 495e09fbae355..dd3235244e243 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -134,13 +134,13 @@ dumpDWARFv5StringOffsetsSection(raw_ostream &OS, StringRef SectionName,
uint64_t StringOffset =
StrOffsetExt.getRelocatedValue(EntrySize, &Offset);
if (Format == DWARF32) {
- OS << format("%8.8x ", StringOffset);
uint32_t StringOffset32 = (uint32_t)StringOffset;
+ OS << format("%8.8x ", StringOffset32);
const char *S = StrData.getCStr(&StringOffset32);
if (S)
OS << format("\"%s\"", S);
} else
- OS << format("%16.16x ", StringOffset);
+ OS << format("%16.16" PRIx64 " ", StringOffset);
OS << "\n";
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 6cf44ffa3796a..4de46bea301e9 100644
--- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -196,7 +196,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
++NumErrors;
OS << "error: DW_AT_stmt_list offset is beyond .debug_line "
"bounds: "
- << format("0x%08" PRIx32, *SectionOffset) << "\n";
+ << format("0x%08" PRIx64, *SectionOffset) << "\n";
Die.dump(OS, 0);
OS << "\n";
}
@@ -234,7 +234,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
if (CUOffset >= CUSize) {
++NumErrors;
OS << "error: " << FormEncodingString(Form) << " CU offset "
- << format("0x%08" PRIx32, CUOffset)
+ << format("0x%08" PRIx64, CUOffset)
<< " is invalid (must be less than CU size of "
<< format("0x%08" PRIx32, CUSize) << "):\n";
Die.dump(OS, 0);
@@ -366,7 +366,7 @@ void DWARFVerifier::verifyDebugLineRows() {
if (Row.Address < PrevAddress) {
++NumDebugLineErrors;
OS << "error: .debug_line["
- << format("0x%08" PRIx32,
+ << format("0x%08" PRIx64,
*toSectionOffset(Die.find(DW_AT_stmt_list)))
<< "] row[" << RowIndex
<< "] decreases in address from previous row:\n";
@@ -381,7 +381,7 @@ void DWARFVerifier::verifyDebugLineRows() {
if (Row.File > MaxFileIndex) {
++NumDebugLineErrors;
OS << "error: .debug_line["
- << format("0x%08" PRIx32,
+ << format("0x%08" PRIx64,
*toSectionOffset(Die.find(DW_AT_stmt_list)))
<< "][" << RowIndex << "] has invalid file index " << Row.File
<< " (valid values are [1," << MaxFileIndex << "]):\n";
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
index a515bc8ad16de..ff039463d08c8 100644
--- a/lib/Object/COFFImportFile.cpp
+++ b/lib/Object/COFFImportFile.cpp
@@ -557,7 +557,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
ArrayRef<COFFShortExport> Exports,
- MachineTypes Machine) {
+ MachineTypes Machine, bool MakeWeakAliases) {
std::vector<NewArchiveMember> Members;
ObjectFactory OF(llvm::sys::path::filename(ImportName), Machine);
@@ -575,7 +575,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
if (E.Private)
continue;
- if (E.isWeak()) {
+ if (E.isWeak() && MakeWeakAliases) {
Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, false));
Members.push_back(OF.createWeakExternal(E.Name, E.ExtName, true));
continue;
@@ -587,7 +587,7 @@ std::error_code writeImportLibrary(StringRef ImportName, StringRef Path,
if (E.Constant)
ImportType = IMPORT_CONST;
- StringRef SymbolName = E.isWeak() ? E.ExtName : E.Name;
+ StringRef SymbolName = E.SymbolName.empty() ? E.Name : E.SymbolName;
ImportNameType NameType = getNameType(SymbolName, E.Name, Machine);
Expected<std::string> Name = E.ExtName.empty()
? SymbolName
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 005f2d51e4036..9a7f45bde6c99 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -388,6 +388,10 @@ static unsigned isMatchingStore(MachineInstr &LoadInst,
}
static unsigned getPreIndexedOpcode(unsigned Opc) {
+ // FIXME: We don't currently support creating pre-indexed loads/stores when
+ // the load or store is the unscaled version. If we decide to perform such an
+ // optimization in the future the cases for the unscaled loads/stores will
+ // need to be added here.
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
@@ -451,32 +455,42 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
default:
llvm_unreachable("Opcode has no post-indexed wise equivalent!");
case AArch64::STRSui:
+ case AArch64::STURSi:
return AArch64::STRSpost;
case AArch64::STRDui:
+ case AArch64::STURDi:
return AArch64::STRDpost;
case AArch64::STRQui:
+ case AArch64::STURQi:
return AArch64::STRQpost;
case AArch64::STRBBui:
return AArch64::STRBBpost;
case AArch64::STRHHui:
return AArch64::STRHHpost;
case AArch64::STRWui:
+ case AArch64::STURWi:
return AArch64::STRWpost;
case AArch64::STRXui:
+ case AArch64::STURXi:
return AArch64::STRXpost;
case AArch64::LDRSui:
+ case AArch64::LDURSi:
return AArch64::LDRSpost;
case AArch64::LDRDui:
+ case AArch64::LDURDi:
return AArch64::LDRDpost;
case AArch64::LDRQui:
+ case AArch64::LDURQi:
return AArch64::LDRQpost;
case AArch64::LDRBBui:
return AArch64::LDRBBpost;
case AArch64::LDRHHui:
return AArch64::LDRHHpost;
case AArch64::LDRWui:
+ case AArch64::LDURWi:
return AArch64::LDRWpost;
case AArch64::LDRXui:
+ case AArch64::LDURXi:
return AArch64::LDRXpost;
case AArch64::LDRSWui:
return AArch64::LDRSWpost;
@@ -1694,8 +1708,9 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++NumPostFolded;
break;
}
- // Don't know how to handle pre/post-index versions, so move to the next
- // instruction.
+
+ // Don't know how to handle unscaled pre/post-index versions below, so
+ // move to the next instruction.
if (TII->isUnscaledLdSt(Opc)) {
++MBBI;
break;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index ec49f0d37af44..46d8f0dba6914 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -769,8 +769,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
const MachineOperand &Dest = MI.getOperand(0);
- unsigned StatusReg = MI.getOperand(1).getReg();
- bool StatusDead = MI.getOperand(1).isDead();
+ unsigned TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
@@ -797,23 +796,9 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
}
// .Lloadcmp:
- // mov wStatus, #0
// ldrex rDest, [rAddr]
// cmp rDest, rDesired
// bne .Ldone
- if (!StatusDead) {
- if (IsThumb) {
- BuildMI(LoadCmpBB, DL, TII->get(ARM::tMOVi8), StatusReg)
- .addDef(ARM::CPSR, RegState::Dead)
- .addImm(0)
- .add(predOps(ARMCC::AL));
- } else {
- BuildMI(LoadCmpBB, DL, TII->get(ARM::MOVi), StatusReg)
- .addImm(0)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
- }
- }
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
@@ -836,10 +821,10 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
- // strex rStatus, rNew, [rAddr]
- // cmp rStatus, #0
+ // strex rTempReg, rNew, [rAddr]
+ // cmp rTempReg, #0
// bne .Lloadcmp
- MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg)
+ MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
.addReg(NewReg)
.addReg(AddrReg);
if (StrexOp == ARM::t2STREX)
@@ -848,7 +833,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, getKillRegState(StatusDead))
+ .addReg(TempReg, RegState::Kill)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
@@ -904,8 +889,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
MachineOperand &Dest = MI.getOperand(0);
- unsigned StatusReg = MI.getOperand(1).getReg();
- bool StatusDead = MI.getOperand(1).isDead();
+ unsigned TempReg = MI.getOperand(1).getReg();
// Duplicating undef operands into 2 instructions does not guarantee the same
// value on both; However undef should be replaced by xzr anyway.
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
@@ -931,7 +915,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
// .Lloadcmp:
// ldrexd rDestLo, rDestHi, [rAddr]
// cmp rDestLo, rDesiredLo
- // sbcs rStatus<dead>, rDestHi, rDesiredHi
+ // sbcs rTempReg<dead>, rDestHi, rDesiredHi
// bne .Ldone
unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
MachineInstrBuilder MIB;
@@ -959,17 +943,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
- // strexd rStatus, rNewLo, rNewHi, [rAddr]
- // cmp rStatus, #0
+ // strexd rTempReg, rNewLo, rNewHi, [rAddr]
+ // cmp rTempReg, #0
// bne .Lloadcmp
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
- MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
+ MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, getKillRegState(StatusDead))
+ .addReg(TempReg, RegState::Kill)
.addImm(0)
.add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index d06b7d0896f16..7206083a70791 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -6053,21 +6053,21 @@ def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
// significantly more naive than the standard expansion: we conservatively
// assume seq_cst, strong cmpxchg and omit clrex on failure.
-let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
+let Constraints = "@earlyclobber $Rd,@earlyclobber $temp",
mayLoad = 1, mayStore = 1 in {
-def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status),
+def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$temp),
(ins GPR:$addr, GPR:$desired, GPR:$new),
NoItinerary, []>, Sched<[]>;
-def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
+def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp),
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
NoItinerary, []>, Sched<[]>;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7563bffd8f873..1e73122cdc388 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -419,6 +419,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
}
+
+ // Custom action for SELECT MMX and expand action for SELECT_CC MMX
+ setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
+
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
@@ -1383,7 +1388,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Custom under AVX1.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
- MVT::v8f32, MVT::v4f64 })
+ MVT::v8f32, MVT::v4f64, MVT::v1i1 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
MVT::v16i1, MVT::v32i1, MVT::v64i1 })
@@ -14570,6 +14575,21 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
MVT ResVT = Op.getSimpleValueType();
+ // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond
+ // would result with: v1i1 = extract_subvector(vXi1, idx).
+ // Lower these into extract_vector_elt which is already selectable.
+ if (ResVT == MVT::v1i1) {
+ assert(Subtarget.hasAVX512() &&
+ "Boolean EXTRACT_SUBVECTOR requires AVX512");
+
+ MVT EltVT = ResVT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ MVT LegalVT =
+ (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT();
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res);
+ }
+
assert((In.getSimpleValueType().is256BitVector() ||
In.getSimpleValueType().is512BitVector()) &&
"Can only extract from 256-bit or 512-bit vectors");
@@ -20651,8 +20671,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
}
// ADC/ADCX/SBB
case ADX: {
- SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
- SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other);
+ SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+ SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::i32);
SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2),
DAG.getConstant(-1, dl, MVT::i8));
SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3),
@@ -30663,6 +30683,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue(N, 0);
}
+ // Custom action for SELECT MMX
+ if (VT == MVT::x86mmx) {
+ LHS = DAG.getBitcast(MVT::i64, LHS);
+ RHS = DAG.getBitcast(MVT::i64, RHS);
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::i64, Cond, LHS, RHS);
+ return DAG.getBitcast(VT, newSelect);
+ }
+
return SDValue();
}
@@ -33358,7 +33386,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
- SDValue NewChain = NewLd.getValue(1);
+ // Make sure new load is placed in same chain order.
+ SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
if (TokenFactorIndex >= 0) {
Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
@@ -33379,11 +33408,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
Ld->getPointerInfo().getWithOffset(4),
MinAlign(Ld->getAlignment(), 4),
Ld->getMemOperand()->getFlags());
+ // Make sure new loads are placed in same chain order.
+ SDValue NewChain = DAG.makeEquivalentMemoryOrdering(Ld, LoLd);
+ NewChain = DAG.makeEquivalentMemoryOrdering(Ld, HiLd);
- SDValue NewChain = LoLd.getValue(1);
if (TokenFactorIndex >= 0) {
- Ops.push_back(LoLd);
- Ops.push_back(HiLd);
+ Ops.push_back(NewChain);
NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 705d0f7a5cf7d..0e654a380e7c5 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -978,6 +978,44 @@ multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
(_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
}
+multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name,
+ X86VectorVTInfo _, SDPatternOperator OpNode,
+ RegisterClass SrcRC, SubRegIndex Subreg> {
+ let ExeDomain = _.ExeDomain in
+ defm r : AVX512_maskable_custom<opc, MRMSrcReg,
+ (outs _.RC:$dst), (ins GR32:$src),
+ !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
+ !con((ins _.KRCWM:$mask), (ins GR32:$src)),
+ "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
+ "$src0 = $dst">, T8PD, EVEX;
+
+ def : Pat <(_.VT (OpNode SrcRC:$src)),
+ (!cast<Instruction>(Name#r)
+ (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
+
+ def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
+ (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
+ (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
+
+ def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
+ (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
+ (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
+}
+
+multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
+ AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
+ RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC,
+ Subreg>, EVEX_V512;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode,
+ SrcRC, Subreg>, EVEX_V256;
+ defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode,
+ SrcRC, Subreg>, EVEX_V128;
+ }
+}
+
multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
SDPatternOperator OpNode,
RegisterClass SrcRC, Predicate prd> {
@@ -989,18 +1027,11 @@ multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
}
}
-let isCodeGenOnly = 1 in {
-defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
- X86VBroadcast, GR8, HasBWI>;
-defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
- X86VBroadcast, GR16, HasBWI>;
-}
-let isAsmParserOnly = 1 in {
- defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
- null_frag, GR32, HasBWI>;
- defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
- null_frag, GR32, HasBWI>;
-}
+defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
+ avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
+defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
+ avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
+ HasBWI>;
defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
X86VBroadcast, GR32, HasAVX512>;
defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index a7de793060746..fc15dc1e6032c 100644
--- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -60,11 +60,13 @@ std::vector<std::unique_ptr<MemoryBuffer>> OwningMBs;
// Opens a file. Path has to be resolved already.
// Newly created memory buffers are owned by this driver.
-MemoryBufferRef openFile(StringRef Path) {
+Optional<MemoryBufferRef> openFile(StringRef Path) {
ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path);
- if (std::error_code EC = MB.getError())
+ if (std::error_code EC = MB.getError()) {
llvm::errs() << "fail openFile: " << EC.message() << "\n";
+ return None;
+ }
MemoryBufferRef MBRef = MB.get()->getMemBufferRef();
OwningMBs.push_back(std::move(MB.get())); // take ownership
@@ -114,11 +116,16 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
- MemoryBufferRef MB;
- if (auto *Arg = Args.getLastArg(OPT_d))
- MB = openFile(Arg->getValue());
+ if (!Args.hasArg(OPT_d)) {
+ llvm::errs() << "no definition file specified\n";
+ return 1;
+ }
+
+ Optional<MemoryBufferRef> MB = openFile(Args.getLastArg(OPT_d)->getValue());
+ if (!MB)
+ return 1;
- if (!MB.getBufferSize()) {
+ if (!MB->getBufferSize()) {
llvm::errs() << "definition file empty\n";
return 1;
}
@@ -133,7 +140,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
}
Expected<COFFModuleDefinition> Def =
- parseCOFFModuleDefinition(MB, Machine, true);
+ parseCOFFModuleDefinition(*MB, Machine, true);
if (!Def) {
llvm::errs() << "error parsing definition\n"
@@ -154,7 +161,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
if (Path.empty())
Path = getImplibPath(Def->OutputFile);
- if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine))
+ if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
return 1;
return 0;
}
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index a33490f6e4acf..ddc975cbed1a7 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -1470,6 +1470,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
i = CS.arg_begin();
+ const unsigned ShadowArgStart = Args.size();
for (unsigned n = FT->getNumParams(); n != 0; ++i, --n)
Args.push_back(DFSF.getShadow(*i));
@@ -1505,6 +1506,15 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
CustomCI->setCallingConv(CI->getCallingConv());
CustomCI->setAttributes(CI->getAttributes());
+ // Update the parameter attributes of the custom call instruction to
+ // zero extend the shadow parameters. This is required for targets
+ // which consider ShadowTy an illegal type.
+ for (unsigned n = 0; n < FT->getNumParams(); n++) {
+ const unsigned ArgNo = ShadowArgStart + n;
+ if (CustomCI->getArgOperand(ArgNo)->getType() == DFSF.DFS.ShadowTy)
+ CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
+ }
+
if (!FT->getReturnType()->isVoidTy()) {
LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca);
DFSF.setShadow(CustomCI, LabelLoad);
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index 61e8700f1cd67..2e5618686ec21 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/BDCE.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DemandedBits.h"
@@ -35,6 +36,46 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed (unused)");
STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
+/// If an instruction is trivialized (dead), then the chain of users of that
+/// instruction may need to be cleared of assumptions that can no longer be
+/// guaranteed correct.
+static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
+ assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
+
+ // Initialize the worklist with eligible direct users.
+ SmallVector<Instruction *, 16> WorkList;
+ for (User *JU : I->users()) {
+ // If all bits of a user are demanded, then we know that nothing below that
+ // in the def-use chain needs to be changed.
+ auto *J = dyn_cast<Instruction>(JU);
+ if (J && !DB.getDemandedBits(J).isAllOnesValue())
+ WorkList.push_back(J);
+ }
+
+ // DFS through subsequent users while tracking visits to avoid cycles.
+ SmallPtrSet<Instruction *, 16> Visited;
+ while (!WorkList.empty()) {
+ Instruction *J = WorkList.pop_back_val();
+
+ // NSW, NUW, and exact are based on operands that might have changed.
+ J->dropPoisonGeneratingFlags();
+
+ // We do not have to worry about llvm.assume or range metadata:
+ // 1. llvm.assume demands its operand, so trivializing can't change it.
+ // 2. range metadata only applies to memory accesses which demand all bits.
+
+ Visited.insert(J);
+
+ for (User *KU : J->users()) {
+ // If all bits of a user are demanded, then we know that nothing below
+ // that in the def-use chain needs to be changed.
+ auto *K = dyn_cast<Instruction>(KU);
+ if (K && !Visited.count(K) && !DB.getDemandedBits(K).isAllOnesValue())
+ WorkList.push_back(K);
+ }
+ }
+}
+
static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
SmallVector<Instruction*, 128> Worklist;
bool Changed = false;
@@ -51,6 +92,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
// replacing all uses with something else. Then, if they don't need to
// remain live (because they have side effects, etc.) we can remove them.
DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+
+ clearAssumptionsOfUsers(&I, DB);
+
// FIXME: In theory we could substitute undef here instead of zero.
// This should be reconsidered once we settle on the semantics of
// undef, poison, etc.