diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-09-11 10:09:45 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-09-11 10:09:45 +0000 |
| commit | 36272db3cad448211389168cced4baac39a1a0d1 (patch) | |
| tree | e570502f0d6730e432657fc86304fa02a2de80fa /lib | |
| parent | 8568f9cb5af587ccee4088af3e2d617b3c30d403 (diff) | |
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/MC/MCParser/AsmParser.cpp | 16 | ||||
| -rw-r--r-- | lib/Support/Unix/Path.inc | 6 | ||||
| -rw-r--r-- | lib/Support/Unix/Process.inc | 5 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPU.h | 2 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp | 64 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPUAliasAnalysis.h | 2 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMFrameLowering.cpp | 1 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMInstrFormats.td | 1 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 4 | ||||
| -rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 4 | ||||
| -rw-r--r-- | lib/Target/ARM/Thumb2InstrInfo.cpp | 5 | ||||
| -rw-r--r-- | lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 9 | ||||
| -rw-r--r-- | lib/Target/X86/AsmParser/X86AsmParser.cpp | 4 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/LoopSink.cpp | 8 | ||||
| -rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 42 | ||||
| -rw-r--r-- | lib/Transforms/Utils/CloneFunction.cpp | 23 | ||||
| -rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 7 |
18 files changed, 152 insertions, 57 deletions
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 501a1cccf60e..d88c6f76826f 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3348,17 +3348,17 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { } } - // In case there is a -g option as well as debug info from directive .file, - // we turn off the -g option, directly use the existing debug info instead. - // Also reset any implicit ".file 0" for the assembler source. - if (Ctx.getGenDwarfForAssembly()) { - Ctx.getMCDwarfLineTable(0).resetRootFile(); - Ctx.setGenDwarfForAssembly(false); - } - if (FileNumber == -1) getStreamer().EmitFileDirective(Filename); else { + // In case there is a -g option as well as debug info from directive .file, + // we turn off the -g option, directly use the existing debug info instead. + // Also reset any implicit ".file 0" for the assembler source. + if (Ctx.getGenDwarfForAssembly()) { + Ctx.getMCDwarfLineTable(0).resetRootFile(); + Ctx.setGenDwarfForAssembly(false); + } + MD5::MD5Result *CKMem = nullptr; if (HasMD5) { CKMem = (MD5::MD5Result *)Ctx.allocate(sizeof(MD5::MD5Result), 1); diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 7ad57d892ff1..b4279d4fcc0c 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -769,8 +769,10 @@ std::error_code openFile(const Twine &Name, int &ResultFD, SmallString<128> Storage; StringRef P = Name.toNullTerminatedStringRef(Storage); - if ((ResultFD = sys::RetryAfterSignal(-1, ::open, P.begin(), OpenFlags, Mode)) < - 0) + // Call ::open in a lambda to avoid overload resolution in RetryAfterSignal + // when open is overloaded, such as in Bionic. + auto Open = [&]() { return ::open(P.begin(), OpenFlags, Mode); }; + if ((ResultFD = sys::RetryAfterSignal(-1, Open)) < 0) return std::error_code(errno, std::generic_category()); #ifndef O_CLOEXEC if (!(Flags & OF_ChildInherit)) { diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index fa515d44f3f2..3185f45a3a61 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -211,7 +211,10 @@ std::error_code Process::FixupStandardFileDescriptors() { assert(errno == EBADF && "expected errno to have EBADF at this point!"); if (NullFD < 0) { - if ((NullFD = RetryAfterSignal(-1, ::open, "/dev/null", O_RDWR)) < 0) + // Call ::open in a lambda to avoid overload resolution in + // RetryAfterSignal when open is overloaded, such as in Bionic. + auto Open = [&]() { return ::open("/dev/null", O_RDWR); }; + if ((NullFD = RetryAfterSignal(-1, Open)) < 0) return std::error_code(errno, std::generic_category()); } diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 796766d94622..2b49c2ea88e1 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -229,7 +229,7 @@ struct AMDGPUAS { enum : unsigned { // The maximum value for flat, generic, local, private, constant and region. - MAX_COMMON_ADDRESS = 5, + MAX_AMDGPU_ADDRESS = 6, GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2) diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index ef4b69d09d9f..974fbcb87191 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -50,47 +50,51 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_) : Arch(Arch_), AS(AS_) { // These arrarys are indexed by address space value - // enum elements 0 ... to 5 - static const AliasResult ASAliasRulesPrivIsZero[6][6] = { - /* Private Global Constant Group Flat Region*/ - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, - /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias}, - /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias}, - /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias}, - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, - /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias} + // enum elements 0 ... to 6 + static const AliasResult ASAliasRulesPrivIsZero[7][7] = { + /* Private Global Constant Group Flat Region Constant 32-bit */ + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, + /* Global */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}, + /* Constant */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias}, + /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias , NoAlias}, + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias}, + /* Constant 32-bit */ {NoAlias , MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , MayAlias} }; - static const AliasResult ASAliasRulesGenIsZero[6][6] = { - /* Flat Global Region Group Constant Private */ - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, - /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias}, - /* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias}, - /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, - /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias}, - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias} + static const AliasResult ASAliasRulesGenIsZero[7][7] = { + /* Flat Global Region Group Constant Private Constant 32-bit */ + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, + /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias , MayAlias}, + /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias , NoAlias}, + /* Constant */ {MayAlias, MayAlias, MayAlias, NoAlias , NoAlias, NoAlias , MayAlias}, + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Constant 32-bit */ {MayAlias, MayAlias, MayAlias, NoAlias , MayAlias, NoAlias , NoAlias} }; - assert(AS.MAX_COMMON_ADDRESS <= 5); + static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 6, "Addr space out of range"); if (AS.FLAT_ADDRESS == 0) { - assert(AS.GLOBAL_ADDRESS == 1 && - AS.REGION_ADDRESS == 2 && - AS.LOCAL_ADDRESS == 3 && - AS.CONSTANT_ADDRESS == 4 && - AS.PRIVATE_ADDRESS == 5); + assert(AS.GLOBAL_ADDRESS == 1 && + AS.REGION_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.CONSTANT_ADDRESS == 4 && + AS.PRIVATE_ADDRESS == 5 && + AS.CONSTANT_ADDRESS_32BIT == 6); ASAliasRules = &ASAliasRulesGenIsZero; } else { - assert(AS.PRIVATE_ADDRESS == 0 && - AS.GLOBAL_ADDRESS == 1 && - AS.CONSTANT_ADDRESS == 2 && - AS.LOCAL_ADDRESS == 3 && - AS.FLAT_ADDRESS == 4 && - AS.REGION_ADDRESS == 5); + assert(AS.PRIVATE_ADDRESS == 0 && + AS.GLOBAL_ADDRESS == 1 && + AS.CONSTANT_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.FLAT_ADDRESS == 4 && + AS.REGION_ADDRESS == 5 && + AS.CONSTANT_ADDRESS_32BIT == 6); ASAliasRules = &ASAliasRulesPrivIsZero; } } AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1, unsigned AS2) const { - if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) { + if (AS1 > AS.MAX_AMDGPU_ADDRESS || AS2 > AS.MAX_AMDGPU_ADDRESS) { if (Arch == Triple::amdgcn) report_fatal_error("Pointer address space out of range"); return AS1 == AS2 ? MayAlias : NoAlias; diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 645a38af753c..09ad51d5e42f 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -63,7 +63,7 @@ private: private: Triple::ArchType Arch; AMDGPUAS AS; - const AliasResult (*ASAliasRules)[6][6]; + const AliasResult (*ASAliasRules)[7][7]; } ASAliasRules; }; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index f25f4d4693ea..7cb0e12a6809 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1451,7 +1451,11 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool &Imm) const { SDLoc SL(Addr); - if (CurDAG->isBaseWithConstantOffset(Addr)) { + // A 32-bit (address + offset) should not cause unsigned 32-bit integer + // wraparound, because s_load instructions perform the addition in 64 bits. + if ((Addr.getValueType() != MVT::i32 || + Addr->getFlags().hasNoUnsignedWrap()) && + CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index a8c75702d7b5..56ad7a0f0446 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1514,6 +1514,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, break; case ARMII::AddrMode5: case ARMII::AddrModeT2_i8s4: + case ARMII::AddrModeT2_ldrex: Limit = std::min(Limit, ((1U << 8) - 1) * 4); break; case ARMII::AddrModeT2_i12: diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 70aded247f65..1d3b1414f090 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -109,6 +109,7 @@ def AddrModeT2_pc : AddrMode<14>; def AddrModeT2_i8s4 : AddrMode<15>; def AddrMode_i12 : AddrMode<16>; def AddrMode5FP16 : AddrMode<17>; +def AddrModeT2_ldrex : AddrMode<18>; // Load / store index mode. class IndexMode<bits<2> val> { diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index c7133b6483ef..f67075fbf9fd 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3267,7 +3267,7 @@ def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, Requires<[IsThumb, HasV8MBaseline]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), - AddrModeNone, 4, NoItinerary, + AddrModeT2_ldrex, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, Requires<[IsThumb, HasV8MBaseline]> { @@ -3346,7 +3346,7 @@ def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), - AddrModeNone, 4, NoItinerary, + AddrModeT2_ldrex, 4, NoItinerary, "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index b918006fe9e3..beeb5dec4baf 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -201,7 +201,8 @@ namespace ARMII { AddrModeT2_pc = 14, // +/- i12 for pc relative data AddrModeT2_i8s4 = 15, // i8 * 4 AddrMode_i12 = 16, - AddrMode5FP16 = 17 // i8 * 2 + AddrMode5FP16 = 17, // i8 * 2 + AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst }; inline static const char *AddrModeToString(AddrMode addrmode) { @@ -224,6 +225,7 @@ namespace ARMII { case AddrModeT2_pc: return "AddrModeT2_pc"; case AddrModeT2_i8s4: return "AddrModeT2_i8s4"; case AddrMode_i12: return "AddrMode_i12"; + case AddrModeT2_ldrex:return "AddrModeT2_ldrex"; } } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index d5f0ba9ee485..1a91a7030657 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -621,6 +621,11 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // MCInst operand expects already scaled value. Scale = 1; assert((Offset & 3) == 0 && "Can't encode this offset!"); + } else if (AddrMode == ARMII::AddrModeT2_ldrex) { + Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4; + NumBits = 8; // 8 bits scaled by 4 + Scale = 4; + assert((Offset & 3) == 0 && "Can't encode this offset!"); } else { llvm_unreachable("Unsupported addressing mode!"); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 6c255e9ef780..1822d8688fa2 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -10,6 +10,8 @@ #include "MCTargetDesc/BPFMCTargetDesc.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/EndianStream.h" @@ -71,7 +73,12 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, bool IsResolved, const MCSubtargetInfo *STI) const { if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { - assert(Value == 0); + if (Value) { + MCContext &Ctx = Asm.getContext(); + Ctx.reportError(Fixup.getLoc(), + "Unsupported relocation: try to compile with -O2 or above, " + "or check your static variable usage"); + } } else if (Fixup.getKind() == FK_Data_4) { support::endian::write<uint32_t>(&Data[Fixup.getOffset()], Value, Endian); } else if (Fixup.getKind() == FK_Data_8) { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index b02e4d80fbba..8b7b250e1a09 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1054,7 +1054,7 @@ static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, // RIP/EIP-relative addressing is only supported in 64-bit mode. if (!Is64BitMode && BaseReg != 0 && (BaseReg == X86::RIP || BaseReg == X86::EIP)) { - ErrMsg = "RIP-relative addressing requires 64-bit mode"; + ErrMsg = "IP-relative addressing requires 64-bit mode"; return true; } @@ -1099,7 +1099,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, // checked. // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a // REX prefix. - if (RegNo == X86::RIZ || RegNo == X86::RIP || RegNo == X86::EIP || + if (RegNo == X86::RIZ || RegNo == X86::RIP || X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || X86II::isX86_64NonExtLowByteReg(RegNo) || X86II::isX86_64ExtendedReg(RegNo)) diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index 760177c9c5e9..7d62349d4719 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -152,6 +152,14 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, } } + // Can't sink into blocks that have no valid insertion point. + for (BasicBlock *BB : BBsToSinkInto) { + if (BB->getFirstInsertionPt() == BB->end()) { + BBsToSinkInto.clear(); + break; + } + } + // If the total frequency of BBsToSinkInto is larger than preheader frequency, // do not sink. if (adjustedSumFreq(BBsToSinkInto, BFI) > diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index de16b608f752..bf482bf5272e 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3046,6 +3046,42 @@ private: return true; } + void fixLoadStoreAlign(Instruction &Root) { + // This algorithm implements the same visitor loop as + // hasUnsafePHIOrSelectUse, and fixes the alignment of each load + // or store found. + SmallPtrSet<Instruction *, 4> Visited; + SmallVector<Instruction *, 4> Uses; + Visited.insert(&Root); + Uses.push_back(&Root); + do { + Instruction *I = Uses.pop_back_val(); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + continue; + } + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) { + Value *Op = SI->getOperand(0); + StoreAlign = DL.getABITypeAlignment(Op->getType()); + } + SI->setAlignment(std::min(StoreAlign, getSliceAlign())); + continue; + } + + assert(isa<BitCastInst>(I) || isa<PHINode>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I)); + for (User *U : I->users()) + if (Visited.insert(cast<Instruction>(U)).second) + Uses.push_back(cast<Instruction>(U)); + } while (!Uses.empty()); + } + bool visitPHINode(PHINode &PN) { LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); @@ -3069,6 +3105,9 @@ private: LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this PHI node. + fixLoadStoreAlign(PN); + // PHIs can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. @@ -3093,6 +3132,9 @@ private: LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this select. + fixLoadStoreAlign(SI); + // Selects can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 807360340055..9ae60962a631 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -636,6 +636,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { + // We need to simplify conditional branches and switches with a constant + // operand. We try to prune these out when cloning, but if the + // simplification required looking through PHI nodes, those are only + // available after forming the full basic block. That may leave some here, + // and we still want to prune the dead code as early as possible. + // + // Do the folding before we check if the block is dead since we want code + // like + // bb: + // br i1 undef, label %bb, label %bb + // to be simplified to + // bb: + // br label %bb + // before we call I->getSinglePredecessor(). + ConstantFoldTerminator(&*I); + // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. @@ -646,13 +662,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, continue; } - // We need to simplify conditional branches and switches with a constant - // operand. We try to prune these out when cloning, but if the - // simplification required looking through PHI nodes, those are only - // available after forming the full basic block. That may leave some here, - // and we still want to prune the dead code as early as possible. - ConstantFoldTerminator(&*I); - BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 859d0c92ca5a..1c7d0a63a5ca 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4510,6 +4510,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { for (auto OV : I->operand_values()) { if (isOutOfScope(OV)) continue; + // First order recurrence Phi's should typically be considered + // non-uniform. + auto *OP = dyn_cast<PHINode>(OV); + if (OP && Legal->isFirstOrderRecurrence(OP)) + continue; + // If all the users of the operand are uniform, then add the + // operand into the uniform worklist. auto *OI = cast<Instruction>(OV); if (llvm::all_of(OI->users(), [&](User *U) -> bool { auto *J = cast<Instruction>(U); |
