aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/MC/MCParser/AsmParser.cpp16
-rw-r--r--lib/Support/Unix/Path.inc6
-rw-r--r--lib/Support/Unix/Process.inc5
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp64
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp6
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp1
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td1
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h4
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp5
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp9
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp8
-rw-r--r--lib/Transforms/Scalar/SROA.cpp42
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp23
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp7
18 files changed, 152 insertions, 57 deletions
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 501a1cccf60e..d88c6f76826f 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -3348,17 +3348,17 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
}
}
- // In case there is a -g option as well as debug info from directive .file,
- // we turn off the -g option, directly use the existing debug info instead.
- // Also reset any implicit ".file 0" for the assembler source.
- if (Ctx.getGenDwarfForAssembly()) {
- Ctx.getMCDwarfLineTable(0).resetRootFile();
- Ctx.setGenDwarfForAssembly(false);
- }
-
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
else {
+ // In case there is a -g option as well as debug info from directive .file,
+ // we turn off the -g option, directly use the existing debug info instead.
+ // Also reset any implicit ".file 0" for the assembler source.
+ if (Ctx.getGenDwarfForAssembly()) {
+ Ctx.getMCDwarfLineTable(0).resetRootFile();
+ Ctx.setGenDwarfForAssembly(false);
+ }
+
MD5::MD5Result *CKMem = nullptr;
if (HasMD5) {
CKMem = (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1);
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 7ad57d892ff1..b4279d4fcc0c 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -769,8 +769,10 @@ std::error_code openFile(const Twine &Name, int &ResultFD,
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
- if ((ResultFD = sys::RetryAfterSignal(-1, ::open, P.begin(), OpenFlags, Mode)) <
- 0)
+ // Call ::open in a lambda to avoid overload resolution in RetryAfterSignal
+ // when open is overloaded, such as in Bionic.
+ auto Open = [&]() { return ::open(P.begin(), OpenFlags, Mode); };
+ if ((ResultFD = sys::RetryAfterSignal(-1, Open)) < 0)
return std::error_code(errno, std::generic_category());
#ifndef O_CLOEXEC
if (!(Flags & OF_ChildInherit)) {
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index fa515d44f3f2..3185f45a3a61 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -211,7 +211,10 @@ std::error_code Process::FixupStandardFileDescriptors() {
assert(errno == EBADF && "expected errno to have EBADF at this point!");
if (NullFD < 0) {
- if ((NullFD = RetryAfterSignal(-1, ::open, "/dev/null", O_RDWR)) < 0)
+ // Call ::open in a lambda to avoid overload resolution in
+ // RetryAfterSignal when open is overloaded, such as in Bionic.
+ auto Open = [&]() { return ::open("/dev/null", O_RDWR); };
+ if ((NullFD = RetryAfterSignal(-1, Open)) < 0)
return std::error_code(errno, std::generic_category());
}
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 796766d94622..2b49c2ea88e1 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -229,7 +229,7 @@ struct AMDGPUAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
- MAX_COMMON_ADDRESS = 5,
+ MAX_AMDGPU_ADDRESS = 6,
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index ef4b69d09d9f..974fbcb87191 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -50,47 +50,51 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
: Arch(Arch_), AS(AS_) {
// These arrarys are indexed by address space value
- // enum elements 0 ... to 5
- static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
- /* Private Global Constant Group Flat Region*/
- /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
- /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
- /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
- /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
- /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
- /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
+ // enum elements 0 ... to 6
+ static const AliasResult ASAliasRulesPrivIsZero[7][7] = {
+ /* Private Global Constant Group Flat Region Constant 32-bit */
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
+ /* Global */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
+ /* Constant */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias},
+ /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias},
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
+ /* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}
};
- static const AliasResult ASAliasRulesGenIsZero[6][6] = {
- /* Flat Global Region Group Constant Private */
- /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
- /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
- /* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
- /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
- /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias},
- /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
+ static const AliasResult ASAliasRulesGenIsZero[7][7] = {
+ /* Flat Global Region Group Constant Private Constant 32-bit */
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias},
+ /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias , MayAlias},
+ /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias},
+ /* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias},
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
+ /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias}
};
- assert(AS.MAX_COMMON_ADDRESS <= 5);
+ static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range");
if (AS.FLAT_ADDRESS == 0) {
- assert(AS.GLOBAL_ADDRESS == 1 &&
- AS.REGION_ADDRESS == 2 &&
- AS.LOCAL_ADDRESS == 3 &&
- AS.CONSTANT_ADDRESS == 4 &&
- AS.PRIVATE_ADDRESS == 5);
+ assert(AS.GLOBAL_ADDRESS == 1 &&
+ AS.REGION_ADDRESS == 2 &&
+ AS.LOCAL_ADDRESS == 3 &&
+ AS.CONSTANT_ADDRESS == 4 &&
+ AS.PRIVATE_ADDRESS == 5 &&
+ AS.CONSTANT_ADDRESS_32BIT == 6);
ASAliasRules = &ASAliasRulesGenIsZero;
} else {
- assert(AS.PRIVATE_ADDRESS == 0 &&
- AS.GLOBAL_ADDRESS == 1 &&
- AS.CONSTANT_ADDRESS == 2 &&
- AS.LOCAL_ADDRESS == 3 &&
- AS.FLAT_ADDRESS == 4 &&
- AS.REGION_ADDRESS == 5);
+ assert(AS.PRIVATE_ADDRESS == 0 &&
+ AS.GLOBAL_ADDRESS == 1 &&
+ AS.CONSTANT_ADDRESS == 2 &&
+ AS.LOCAL_ADDRESS == 3 &&
+ AS.FLAT_ADDRESS == 4 &&
+ AS.REGION_ADDRESS == 5 &&
+ AS.CONSTANT_ADDRESS_32BIT == 6);
ASAliasRules = &ASAliasRulesPrivIsZero;
}
}
AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
unsigned AS2) const {
- if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) {
+ if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) {
if (Arch == Triple::amdgcn)
report_fatal_error("Pointer address space out of range");
return AS1 == AS2 ? MayAlias : NoAlias;
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 645a38af753c..09ad51d5e42f 100644
--- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -63,7 +63,7 @@ private:
private:
Triple::ArchType Arch;
AMDGPUAS AS;
- const AliasResult (*ASAliasRules)[6][6];
+ const AliasResult (*ASAliasRules)[7][7];
} ASAliasRules;
};
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index f25f4d4693ea..7cb0e12a6809 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1451,7 +1451,11 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
SDValue &Offset, bool &Imm) const {
SDLoc SL(Addr);
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ // A 32-bit (address + offset) should not cause unsigned 32-bit integer
+ // wraparound, because s_load instructions perform the addition in 64 bits.
+ if ((Addr.getValueType() != MVT::i32 ||
+ Addr->getFlags().hasNoUnsignedWrap()) &&
+ CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a8c75702d7b5..56ad7a0f0446 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1514,6 +1514,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
break;
case ARMII::AddrMode5:
case ARMII::AddrModeT2_i8s4:
+ case ARMII::AddrModeT2_ldrex:
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 70aded247f65..1d3b1414f090 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -109,6 +109,7 @@ def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
def AddrMode_i12 : AddrMode<16>;
def AddrMode5FP16 : AddrMode<17>;
+def AddrModeT2_ldrex : AddrMode<18>;
// Load / store index mode.
class IndexMode<bits<2> val> {
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c7133b6483ef..f67075fbf9fd 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3267,7 +3267,7 @@ def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
[(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>,
Requires<[IsThumb, HasV8MBaseline]>;
def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
- AddrModeNone, 4, NoItinerary,
+ AddrModeT2_ldrex, 4, NoItinerary,
"ldrex", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>,
Requires<[IsThumb, HasV8MBaseline]> {
@@ -3346,7 +3346,7 @@ def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
t2addrmode_imm0_1020s4:$addr),
- AddrModeNone, 4, NoItinerary,
+ AddrModeT2_ldrex, 4, NoItinerary,
"strex", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd,
(strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index b918006fe9e3..beeb5dec4baf 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -201,7 +201,8 @@ namespace ARMII {
AddrModeT2_pc = 14, // +/- i12 for pc relative data
AddrModeT2_i8s4 = 15, // i8 * 4
AddrMode_i12 = 16,
- AddrMode5FP16 = 17 // i8 * 2
+ AddrMode5FP16 = 17, // i8 * 2
+ AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
};
inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -224,6 +225,7 @@ namespace ARMII {
case AddrModeT2_pc: return "AddrModeT2_pc";
case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
case AddrMode_i12: return "AddrMode_i12";
+ case AddrModeT2_ldrex:return "AddrModeT2_ldrex";
}
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index d5f0ba9ee485..1a91a7030657 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -621,6 +621,11 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// MCInst operand expects already scaled value.
Scale = 1;
assert((Offset & 3) == 0 && "Can't encode this offset!");
+ } else if (AddrMode == ARMII::AddrModeT2_ldrex) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ NumBits = 8; // 8 bits scaled by 4
+ Scale = 4;
+ assert((Offset & 3) == 0 && "Can't encode this offset!");
} else {
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 6c255e9ef780..1822d8688fa2 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -10,6 +10,8 @@
#include "MCTargetDesc/BPFMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/Support/EndianStream.h"
@@ -71,7 +73,12 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
bool IsResolved,
const MCSubtargetInfo *STI) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
- assert(Value == 0);
+ if (Value) {
+ MCContext &Ctx = Asm.getContext();
+ Ctx.reportError(Fixup.getLoc(),
+ "Unsupported relocation: try to compile with -O2 or above, "
+ "or check your static variable usage");
+ }
} else if (Fixup.getKind() == FK_Data_4) {
support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian);
} else if (Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index b02e4d80fbba..8b7b250e1a09 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1054,7 +1054,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
// RIP/EIP-relative addressing is only supported in 64-bit mode.
if (!Is64BitMode && BaseReg != 0 &&
(BaseReg == X86::RIP || BaseReg == X86::EIP)) {
- ErrMsg = "RIP-relative addressing requires 64-bit mode";
+ ErrMsg = "IP-relative addressing requires 64-bit mode";
return true;
}
@@ -1099,7 +1099,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
// checked.
// FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
// REX prefix.
- if (RegNo == X86::RIZ || RegNo == X86::RIP || RegNo == X86::EIP ||
+ if (RegNo == X86::RIZ || RegNo == X86::RIP ||
X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
X86II::isX86_64NonExtLowByteReg(RegNo) ||
X86II::isX86_64ExtendedReg(RegNo))
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 760177c9c5e9..7d62349d4719 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -152,6 +152,14 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
}
}
+ // Can't sink into blocks that have no valid insertion point.
+ for (BasicBlock *BB : BBsToSinkInto) {
+ if (BB->getFirstInsertionPt() == BB->end()) {
+ BBsToSinkInto.clear();
+ break;
+ }
+ }
+
// If the total frequency of BBsToSinkInto is larger than preheader frequency,
// do not sink.
if (adjustedSumFreq(BBsToSinkInto, BFI) >
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index de16b608f752..bf482bf5272e 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3046,6 +3046,42 @@ private:
return true;
}
+ void fixLoadStoreAlign(Instruction &Root) {
+ // This algorithm implements the same visitor loop as
+ // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
+ // or store found.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<Instruction *, 4> Uses;
+ Visited.insert(&Root);
+ Uses.push_back(&Root);
+ do {
+ Instruction *I = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned LoadAlign = LI->getAlignment();
+ if (!LoadAlign)
+ LoadAlign = DL.getABITypeAlignment(LI->getType());
+ LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ unsigned StoreAlign = SI->getAlignment();
+ if (!StoreAlign) {
+ Value *Op = SI->getOperand(0);
+ StoreAlign = DL.getABITypeAlignment(Op->getType());
+ }
+ SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ continue;
+ }
+
+ assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ for (User *U : I->users())
+ if (Visited.insert(cast<Instruction>(U)).second)
+ Uses.push_back(cast<Instruction>(U));
+ } while (!Uses.empty());
+ }
+
bool visitPHINode(PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
@@ -3069,6 +3105,9 @@ private:
LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this PHI node.
+ fixLoadStoreAlign(PN);
+
// PHIs can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
@@ -3093,6 +3132,9 @@ private:
LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this select.
+ fixLoadStoreAlign(SI);
+
// Selects can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 807360340055..9ae60962a631 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -636,6 +636,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
+ // We need to simplify conditional branches and switches with a constant
+ // operand. We try to prune these out when cloning, but if the
+ // simplification required looking through PHI nodes, those are only
+ // available after forming the full basic block. That may leave some here,
+ // and we still want to prune the dead code as early as possible.
+ //
+ // Do the folding before we check if the block is dead since we want code
+ // like
+ // bb:
+ // br i1 undef, label %bb, label %bb
+ // to be simplified to
+ // bb:
+ // br label %bb
+ // before we call I->getSinglePredecessor().
+ ConstantFoldTerminator(&*I);
+
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
@@ -646,13 +662,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
continue;
}
- // We need to simplify conditional branches and switches with a constant
- // operand. We try to prune these out when cloning, but if the
- // simplification required looking through PHI nodes, those are only
- // available after forming the full basic block. That may leave some here,
- // and we still want to prune the dead code as early as possible.
- ConstantFoldTerminator(&*I);
-
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 859d0c92ca5a..1c7d0a63a5ca 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4510,6 +4510,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
for (auto OV : I->operand_values()) {
if (isOutOfScope(OV))
continue;
+ // First order recurrence Phi's should typically be considered
+ // non-uniform.
+ auto *OP = dyn_cast<PHINode>(OV);
+ if (OP && Legal->isFirstOrderRecurrence(OP))
+ continue;
+ // If all the users of the operand are uniform, then add the
+ // operand into the uniform worklist.
auto *OI = cast<Instruction>(OV);
if (llvm::all_of(OI->users(), [&](User *U) -> bool {
auto *J = cast<Instruction>(U);