src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2017-01-22 16:52:30 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-01-22 16:52:30 +0000
commit	7c71d32ab52480cb7bfd9f951450060263a5b9e7 (patch)
tree	c9e92208269d0251cd61fb3e34aad15ea21d7fbc
parent	581a6d8501ff5614297da837b81ed3b6956361ea (diff)
download	src-7c71d32ab52480cb7bfd9f951450060263a5b9e7.tar.gz src-7c71d32ab52480cb7bfd9f951450060263a5b9e7.zip

Vendor import of llvm release_40 branch r292732:vendor/llvm/llvm-release_40-r292732

https://llvm.org/svn/llvm-project/llvm/branches/release_40@292732

Notes

Notes: svn path=/vendor/llvm/dist/; revision=312625 svn path=/vendor/llvm/llvm-release_40-r292732/; revision=312626; tag=vendor/llvm/llvm-release_40-r292732

Diffstat

-rwxr-xr-x

cmake/modules/AddLLVM.cmake

-rw-r--r--

docs/ReleaseNotes.rst

-rw-r--r--

docs/index.rst

-rw-r--r--

include/llvm/Analysis/AssumptionCache.h

-rw-r--r--

lib/Analysis/AssumptionCache.cpp

-rw-r--r--

lib/Analysis/ModuleSummaryAnalysis.cpp

-rw-r--r--

lib/Bitcode/Reader/MetadataLoader.cpp

-rw-r--r--

lib/LTO/ThinLTOCodeGenerator.cpp

-rw-r--r--

lib/Target/X86/X86ISelLowering.cpp

-rw-r--r--

lib/Target/X86/X86Subtarget.cpp

-rw-r--r--

lib/Transforms/Scalar/LoopStrengthReduce.cpp

-rw-r--r--

lib/Transforms/Scalar/NewGVN.cpp

129

-rw-r--r--

lib/Transforms/Vectorize/LoopVectorize.cpp

-rw-r--r--

test/CodeGen/X86/atomic-eflags-reuse.ll

-rw-r--r--

test/CodeGen/X86/slow-pmulld.ll

-rw-r--r--

test/ThinLTO/X86/lazyload_metadata.ll

-rw-r--r--

test/Transforms/LoopStrengthReduce/pr31627.ll

-rw-r--r--

test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll

-rw-r--r--

test/Transforms/NewGVN/pr31613.ll

135

19 files changed, 462 insertions, 175 deletions

diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index fbe790b05b1a..b3c7746c480a 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake

@@ -462,11 +462,9 @@ function(llvm_add_library name)

if(UNIX AND NOT APPLE AND NOT ARG_SONAME)

set_target_properties(${name}

PROPERTIES

- # Concatenate the version numbers since ldconfig expects exactly

- # one component indicating the ABI version, while LLVM uses

- # major+minor for that.

- SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}

- VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})

+ # Since 4.0.0, the ABI version is indicated by the major version

+ SOVERSION ${LLVM_VERSION_MAJOR}

+ VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})

endif()

diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index b92527dbb296..bc5aca521179 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst

@@ -67,13 +67,46 @@ Non-comprehensive list of changes in this release

Makes programs 10x faster by doing Special New Thing.

+ Improvements to ThinLTO (-flto=thin)

+ ------------------------------------

+ * Integration with profile data (PGO). When available, profile data

+ enables more accurate function importing decisions, as well as

+ cross-module indirect call promotion.

+ * Significant build-time and binary-size improvements when compiling with

+ debug info (-g).

Changes to the LLVM IR

----------------------

-Changes to the ARM Backend

+Changes to the ARM Targets

--------------------------

- During this release ...

+**During this release the AArch64 target has:**

+* Gained support for ILP32 relocations.

+* Gained support for XRay.

+* Made even more progress on GlobalISel. There is still some work left before

+ it is production-ready though.

+* Refined the support for Qualcomm's Falkor and Samsung's Exynos CPUs.

+* Learned a few new tricks for lowering multiplications by constants, folding

+ spilled/refilled copies etc.

+**During this release the ARM target has:**

+* Gained support for ROPI (read-only position independence) and RWPI

+ (read-write position independence), which can be used to remove the need for

+ a dynamic linker.

+* Gained support for execute-only code, which is placed in pages without read

+ permissions.

+* Gained a machine scheduler for Cortex-R52.

+* Gained support for XRay.

+* Gained Thumb1 implementations for several compiler-rt builtins. It also

+ has some support for building the builtins for HF targets.

+* Started using the generic bitreverse intrinsic instead of rbit.

+* Gained very basic support for GlobalISel.

+A lot of work has also been done in LLD for ARM, which now supports more

+relocations and TLS.

Changes to the MIPS Target

diff --git a/docs/index.rst b/docs/index.rst
index 341a9c16325b..83fc73387945 100644
--- a/docs/index.rst
+++ b/docs/index.rst

@@ -1,11 +1,6 @@

Overview

========

-.. warning::

- If you are using a released version of LLVM, see `the download page

- <http://llvm.org/releases/>`_ to find your documentation.

The LLVM compiler infrastructure supports a wide range of projects, from

industrial strength compilers to specialized JIT applications to small

research projects.

diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index b50545a0484b..79287ed76f2e 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h

@@ -68,7 +68,10 @@ class AssumptionCache {

AffectedValuesMap AffectedValues;

/// Get the vector of assumptions which affect a value from the cache.

- SmallVector<WeakVH, 1> &getAffectedValues(Value *V);

+ SmallVector<WeakVH, 1> &getOrInsertAffectedValues(Value *V);

+ /// Copy affected values in the cache for OV to be affected values for NV.

+ void copyAffectedValuesInCache(Value *OV, Value *NV);

/// \brief Flag tracking whether we have scanned the function yet.

///

diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index aa55d79b761e..5851594700a4 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp

@@ -24,7 +24,7 @@

using namespace llvm;

using namespace llvm::PatternMatch;

-SmallVector<WeakVH, 1> &AssumptionCache::getAffectedValues(Value *V) {

+SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) {

// Try using find_as first to avoid creating extra value handles just for the

// purpose of doing the lookup.

auto AVI = AffectedValues.find_as(V);

@@ -98,7 +98,7 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {

}

for (auto &AV : Affected) {

- auto &AVV = getAffectedValues(AV);

+ auto &AVV = getOrInsertAffectedValues(AV);

if (std::find(AVV.begin(), AVV.end(), CI) == AVV.end())

AVV.push_back(CI);

}

@@ -111,20 +111,27 @@ void AssumptionCache::AffectedValueCallbackVH::deleted() {

// 'this' now dangles!

}

+void AssumptionCache::copyAffectedValuesInCache(Value *OV, Value *NV) {

+ auto &NAVV = getOrInsertAffectedValues(NV);

+ auto AVI = AffectedValues.find(OV);

+ if (AVI == AffectedValues.end())

+ return;

+ for (auto &A : AVI->second)

+ if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())

+ NAVV.push_back(A);

void AssumptionCache::AffectedValueCallbackVH::allUsesReplacedWith(Value *NV) {

if (!isa<Instruction>(NV) && !isa<Argument>(NV))

return;

// Any assumptions that affected this value now affect the new value.

- auto &NAVV = AC->getAffectedValues(NV);

- auto AVI = AC->AffectedValues.find(getValPtr());

- if (AVI == AC->AffectedValues.end())

- return;

- for (auto &A : AVI->second)

- if (std::find(NAVV.begin(), NAVV.end(), A) == NAVV.end())

- NAVV.push_back(A);

+ AC->copyAffectedValuesInCache(getValPtr(), NV);

+ // 'this' now might dangle! If the AffectedValues map was resized to add an

+ // entry for NV then this object might have been destroyed in favor of some

+ // copy in the grown map.

}

void AssumptionCache::scanFunction() {

diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 6387bb36166e..f5ba637e58e2 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp

@@ -405,6 +405,7 @@ char ModuleSummaryIndexWrapperPass::ID = 0;

INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",

"Module Summary Analysis", false, true)

INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)

+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)

INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis",

"Module Summary Analysis", false, true)

diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index 4a5d18e2db75..b05ab4b1da85 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp

@@ -768,13 +768,12 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(

unsigned ID, PlaceholderQueue &Placeholders) {

assert(ID < (MDStringRef.size()) + GlobalMetadataBitPosIndex.size());

assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString");

-#ifndef NDEBUG

// Lookup first if the metadata hasn't already been loaded.

if (auto *MD = MetadataList.lookup(ID)) {

auto *N = dyn_cast_or_null<MDNode>(MD);

- assert(N && N->isTemporary() && "Lazy loading an already loaded metadata");

+ if (!N->isTemporary())

+ return;

}

-#endif

SmallVector<uint64_t, 64> Record;

StringRef Blob;

IndexCursor.JumpToBit(GlobalMetadataBitPosIndex[ID - MDStringRef.size()]);

@@ -827,8 +826,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(

auto getMD = [&](unsigned ID) -> Metadata * {

if (ID < MDStringRef.size())

return lazyLoadOneMDString(ID);

- if (!IsDistinct)

+ if (!IsDistinct) {

+ if (auto *MD = MetadataList.lookup(ID))

+ return MD;

+ // If lazy-loading is enabled, we try recursively to load the operand

+ // instead of creating a temporary.

+ if (ID < (MDStringRef.size() + GlobalMetadataBitPosIndex.size())) {

+ // Create a temporary for the node that is referencing the operand we

+ // will lazy-load. It is needed before recursing in case there are

+ // uniquing cycles.

+ MetadataList.getMetadataFwdRef(NextMetadataNo);

+ lazyLoadOneMetadata(ID, Placeholders);

+ return MetadataList.lookup(ID);

+ }

+ // Return a temporary.

return MetadataList.getMetadataFwdRef(ID);

+ }

if (auto *MD = MetadataList.getMetadataIfResolved(ID))

return MD;

return &Placeholders.getPlaceholderOp(ID);

diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index a14b86179d6e..104fb199da08 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp

@@ -829,11 +829,22 @@ static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,

// Main entry point for the ThinLTO processing

void ThinLTOCodeGenerator::run() {

+ // Prepare the resulting object vector

+ assert(ProducedBinaries.empty() && "The generator should not be reused");

+ if (SavedObjectsDirectoryPath.empty())

+ ProducedBinaries.resize(Modules.size());

+ else {

+ sys::fs::create_directories(SavedObjectsDirectoryPath);

+ bool IsDir;

+ sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);

+ if (!IsDir)

+ report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");

+ ProducedBinaryFiles.resize(Modules.size());

+ }

if (CodeGenOnly) {

// Perform only parallel codegen and return.

ThreadPool Pool;

- assert(ProducedBinaries.empty() && "The generator should not be reused");

- ProducedBinaries.resize(Modules.size());

int count = 0;

for (auto &ModuleBuffer : Modules) {

Pool.async([&](int count) {

@@ -845,7 +856,12 @@ void ThinLTOCodeGenerator::run() {

/*IsImporting*/ false);

// CodeGen

- ProducedBinaries[count] = codegen(*TheModule);

+ auto OutputBuffer = codegen(*TheModule);

+ if (SavedObjectsDirectoryPath.empty())

+ ProducedBinaries[count] = std::move(OutputBuffer);

+ else

+ ProducedBinaryFiles[count] = writeGeneratedObject(

+ count, "", SavedObjectsDirectoryPath, *OutputBuffer);

}, count++);

}

@@ -866,18 +882,6 @@ void ThinLTOCodeGenerator::run() {

WriteIndexToFile(*Index, OS);

}

- // Prepare the resulting object vector

- assert(ProducedBinaries.empty() && "The generator should not be reused");

- if (SavedObjectsDirectoryPath.empty())

- ProducedBinaries.resize(Modules.size());

- else {

- sys::fs::create_directories(SavedObjectsDirectoryPath);

- bool IsDir;

- sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);

- if (!IsDir)

- report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");

- ProducedBinaryFiles.resize(Modules.size());

- }

// Prepare the module map.

auto ModuleMap = generateModuleMap(Modules);

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 787dff99367e..2f13b722eb3b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp

@@ -29455,19 +29455,11 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,

return SDValue();

}

-/// Combine brcond/cmov/setcc/.. based on comparing the result of

-/// atomic_load_add to use EFLAGS produced by the addition

-/// directly if possible. For example:

-///

-/// (setcc (cmp (atomic_load_add x, -C) C), COND_E)

-/// becomes:

-/// (setcc (LADD x, -C), COND_E)

-///

-/// and

+/// Combine:

/// (brcond/cmov/setcc .., (cmp (atomic_load_add x, 1), 0), COND_S)

-/// becomes:

+/// to:

/// (brcond/cmov/setcc .., (LADD x, 1), COND_LE)

-///

+/// i.e., reusing the EFLAGS produced by the LOCKed instruction.

/// Note that this is only legal for some op/cc combinations.

static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,

SelectionDAG &DAG) {

@@ -29482,7 +29474,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,

if (!Cmp.hasOneUse())

return SDValue();

- // This applies to variations of the common case:

+ // This only applies to variations of the common case:

// (icmp slt x, 0) -> (icmp sle (add x, 1), 0)

// (icmp sge x, 0) -> (icmp sgt (add x, 1), 0)

// (icmp sle x, 0) -> (icmp slt (sub x, 1), 0)

@@ -29501,9 +29493,8 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,

return SDValue();

auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS);

- if (!CmpRHSC)

+ if (!CmpRHSC || CmpRHSC->getZExtValue() != 0)

return SDValue();

- APInt Comparand = CmpRHSC->getAPIntValue();

const unsigned Opc = CmpLHS.getOpcode();

@@ -29519,19 +29510,16 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,

if (Opc == ISD::ATOMIC_LOAD_SUB)

Addend = -Addend;

- if (Comparand == -Addend) {

- // No change to CC.

- } else if (CC == X86::COND_S && Comparand == 0 && Addend == 1) {

+ if (CC == X86::COND_S && Addend == 1)

CC = X86::COND_LE;

- } else if (CC == X86::COND_NS && Comparand == 0 && Addend == 1) {

+ else if (CC == X86::COND_NS && Addend == 1)

CC = X86::COND_G;

- } else if (CC == X86::COND_G && Comparand == 0 && Addend == -1) {

+ else if (CC == X86::COND_G && Addend == -1)

CC = X86::COND_GE;

- } else if (CC == X86::COND_LE && Comparand == 0 && Addend == -1) {

+ else if (CC == X86::COND_LE && Addend == -1)

CC = X86::COND_L;

- } else {

+ else

return SDValue();

- }

SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);

DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),

diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 727ff70c3ff6..586bb7bd7b1a 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp

@@ -232,9 +232,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {

else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||

isTargetKFreeBSD() || In64BitMode)

stackAlignment = 16;

- assert((!isPMULLDSlow() || hasSSE41()) &&

- "Feature Slow PMULLD can only be set on a subtarget with SSE4.1");

}

void X86Subtarget::initializeEnvironment() {

diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a1561fc0a6c2..01728ae680de 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp

@@ -3163,6 +3163,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {

// Don't bother if the instruction is in a BB which ends in an EHPad.

if (UseBB->getTerminator()->isEHPad())

continue;

+ // Don't bother rewriting PHIs in catchswitch blocks.

+ if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))

+ continue;

// Ignore uses which are part of other SCEV expressions, to avoid

// analyzing them multiple times.

if (SE.isSCEVable(UserInst->getType())) {

@@ -4672,7 +4675,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN,

// is the canonical backedge for this loop, which complicates post-inc

// users.

if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&

- !isa<IndirectBrInst>(BB->getTerminator())) {

+ !isa<IndirectBrInst>(BB->getTerminator()) &&

+ !isa<CatchSwitchInst>(BB->getTerminator())) {

BasicBlock *Parent = PN->getParent();

Loop *PNLoop = LI.getLoopFor(Parent);

if (!PNLoop || Parent != PNLoop->getHeader()) {

diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index e1b6741f31b4..6043e04bb8c5 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp

@@ -81,6 +81,10 @@ STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");

STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");

STATISTIC(NumGVNMaxIterations,

"Maximum Number of iterations it took to converge GVN");

+STATISTIC(NumGVNLeaderChanges, "Number of leader changes");

+STATISTIC(NumGVNSortedLeaderChanges, "Number of sorted leader changes");

+STATISTIC(NumGVNAvoidedSortedLeaderChanges,

+ "Number of avoided sorted leader changes");

//===----------------------------------------------------------------------===//

// GVN Pass

@@ -139,6 +143,10 @@ struct CongruenceClass {

// This is used so we can detect store equivalence changes properly.

int StoreCount = 0;

+ // The most dominating leader after our current leader, because the member set

+ // is not sorted and is expensive to keep sorted all the time.

+ std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};

explicit CongruenceClass(unsigned ID) : ID(ID) {}

CongruenceClass(unsigned ID, Value *Leader, const Expression *E)

: ID(ID), RepLeader(Leader), DefiningExpr(E) {}

@@ -320,8 +328,8 @@ private:

// Templated to allow them to work both on BB's and BB-edges.

template <class T>

Value *lookupOperandLeader(Value *, const User *, const T &) const;

- void performCongruenceFinding(Value *, const Expression *);

- void moveValueToNewCongruenceClass(Value *, CongruenceClass *,

+ void performCongruenceFinding(Instruction *, const Expression *);

+ void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *,

CongruenceClass *);

// Reachability handling.

void updateReachableEdge(BasicBlock *, BasicBlock *);

@@ -1056,20 +1064,43 @@ void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {

// Move a value, currently in OldClass, to be part of NewClass

// Update OldClass for the move (including changing leaders, etc)

-void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass,

+void NewGVN::moveValueToNewCongruenceClass(Instruction *I,

+ CongruenceClass *OldClass,

CongruenceClass *NewClass) {

- DEBUG(dbgs() << "New congruence class for " << V << " is " << NewClass->ID

+ DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID

<< "\n");

- OldClass->Members.erase(V);

- NewClass->Members.insert(V);

- if (isa<StoreInst>(V)) {

+ if (I == OldClass->NextLeader.first)

+ OldClass->NextLeader = {nullptr, ~0U};

+ // The new instruction and new class leader may either be siblings in the

+ // dominator tree, or the new class leader should dominate the new member

+ // instruction. We simply check that the member instruction does not properly

+ // dominate the new class leader.

+ assert(

+ !isa<Instruction>(NewClass->RepLeader) || !NewClass->RepLeader ||

+ I == NewClass->RepLeader ||

+ !DT->properlyDominates(

+ I->getParent(),

+ cast<Instruction>(NewClass->RepLeader)->getParent()) &&

+ "New class for instruction should not be dominated by instruction");

+ if (NewClass->RepLeader != I) {

+ auto DFSNum = InstrDFS.lookup(I);

+ if (DFSNum < NewClass->NextLeader.second)

+ NewClass->NextLeader = {I, DFSNum};

+ }

+ OldClass->Members.erase(I);

+ NewClass->Members.insert(I);

+ if (isa<StoreInst>(I)) {

--OldClass->StoreCount;

assert(OldClass->StoreCount >= 0);

++NewClass->StoreCount;

assert(NewClass->StoreCount > 0);

}

- ValueToClass[V] = NewClass;

+ ValueToClass[I] = NewClass;

// See if we destroyed the class or need to swap leaders.

if (OldClass->Members.empty() && OldClass != InitialClass) {

if (OldClass->DefiningExpr) {

@@ -1078,25 +1109,48 @@ void NewGVN::moveValueToNewCongruenceClass(Value *V, CongruenceClass *OldClass,

<< " from table\n");

ExpressionToClass.erase(OldClass->DefiningExpr);

}

- } else if (OldClass->RepLeader == V) {

+ } else if (OldClass->RepLeader == I) {

// When the leader changes, the value numbering of

// everything may change due to symbolization changes, so we need to

// reprocess.

- OldClass->RepLeader = *(OldClass->Members.begin());

+ DEBUG(dbgs() << "Leader change!\n");

+ ++NumGVNLeaderChanges;

+ // We don't need to sort members if there is only 1, and we don't care about

+ // sorting the initial class because everything either gets out of it or is

+ // unreachable.

+ if (OldClass->Members.size() == 1 || OldClass == InitialClass) {

+ OldClass->RepLeader = *(OldClass->Members.begin());

+ } else if (OldClass->NextLeader.first) {

+ ++NumGVNAvoidedSortedLeaderChanges;

+ OldClass->RepLeader = OldClass->NextLeader.first;

+ OldClass->NextLeader = {nullptr, ~0U};

+ } else {

+ ++NumGVNSortedLeaderChanges;

+ // TODO: If this ends up to slow, we can maintain a dual structure for

+ // member testing/insertion, or keep things mostly sorted, and sort only

+ // here, or ....

+ std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U};

+ for (const auto X : OldClass->Members) {

+ auto DFSNum = InstrDFS.lookup(X);

+ if (DFSNum < MinDFS.second)

+ MinDFS = {X, DFSNum};

+ }

+ OldClass->RepLeader = MinDFS.first;

+ }

markLeaderChangeTouched(OldClass);

}

// Perform congruence finding on a given value numbering expression.

-void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {

- ValueToExpression[V] = E;

+void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {

+ ValueToExpression[I] = E;

// This is guaranteed to return something, since it will at least find

// INITIAL.

- CongruenceClass *VClass = ValueToClass[V];

- assert(VClass && "Should have found a vclass");

+ CongruenceClass *IClass = ValueToClass[I];

+ assert(IClass && "Should have found a IClass");

// Dead classes should have been eliminated from the mapping.

- assert(!VClass->Dead && "Found a dead class");

+ assert(!IClass->Dead && "Found a dead class");

CongruenceClass *EClass;

if (const auto *VE = dyn_cast<VariableExpression>(E)) {

@@ -1118,13 +1172,13 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {

NewClass->RepLeader =

lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());

} else {

- NewClass->RepLeader = V;

+ NewClass->RepLeader = I;

}

assert(!isa<VariableExpression>(E) &&

"VariableExpression should have been handled already");

EClass = NewClass;

- DEBUG(dbgs() << "Created new congruence class for " << *V

+ DEBUG(dbgs() << "Created new congruence class for " << *I

<< " using expression " << *E << " at " << NewClass->ID

<< " and leader " << *(NewClass->RepLeader) << "\n");

DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");

@@ -1140,36 +1194,31 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {

assert(!EClass->Dead && "We accidentally looked up a dead class");

}

- bool ClassChanged = VClass != EClass;

- bool LeaderChanged = LeaderChanges.erase(V);

+ bool ClassChanged = IClass != EClass;

+ bool LeaderChanged = LeaderChanges.erase(I);

if (ClassChanged || LeaderChanged) {

DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E

<< "\n");

if (ClassChanged)

- moveValueToNewCongruenceClass(V, VClass, EClass);

- markUsersTouched(V);

- if (auto *I = dyn_cast<Instruction>(V)) {

- if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {

- // If this is a MemoryDef, we need to update the equivalence table. If

- // we determined the expression is congruent to a different memory

- // state, use that different memory state. If we determined it didn't,

- // we update that as well. Right now, we only support store

- // expressions.

- if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&

- EClass->Members.size() != 1) {

- auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();

- setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);

- } else {

- setMemoryAccessEquivTo(MA, nullptr);

- }

- markMemoryUsersTouched(MA);

+ moveValueToNewCongruenceClass(I, IClass, EClass);

+ markUsersTouched(I);

+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {

+ // If this is a MemoryDef, we need to update the equivalence table. If

+ // we determined the expression is congruent to a different memory

+ // state, use that different memory state. If we determined it didn't,

+ // we update that as well. Right now, we only support store

+ // expressions.

+ if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&

+ EClass->Members.size() != 1) {

+ auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();

+ setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);

+ } else {

+ setMemoryAccessEquivTo(MA, nullptr);

}

+ markMemoryUsersTouched(MA);

}

- } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {

+ } else if (auto *SI = dyn_cast<StoreInst>(I)) {

// There is, sadly, one complicating thing for stores. Stores do not

// produce values, only consume them. However, in order to make loads and

// stores value number the same, we ignore the value operand of the store.

diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1b1f86f8efdc..dac7032fa08f 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp

@@ -5602,6 +5602,13 @@ void LoopVectorizationLegality::collectLoopUniforms() {

// is consecutive-like, the pointer operand should remain uniform.

else if (hasConsecutiveLikePtrOperand(&I))

ConsecutiveLikePtrs.insert(Ptr);

+ // Otherwise, if the memory instruction will be vectorized and its

+ // pointer operand is non-consecutive-like, the memory instruction should

+ // be a gather or scatter operation. Its pointer operand will be

+ // non-uniform.

+ else

+ PossibleNonUniformPtrs.insert(Ptr);

}

// Add to the Worklist all consecutive and consecutive-like pointers that

diff --git a/test/CodeGen/X86/atomic-eflags-reuse.ll b/test/CodeGen/X86/atomic-eflags-reuse.ll
index 9902325fd148..9521a2afefcd 100644
--- a/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/test/CodeGen/X86/atomic-eflags-reuse.ll

@@ -192,68 +192,4 @@ entry:

ret i8 %s2

}

-define i8 @test_sub_1_setcc_eq(i64* %p) #0 {

-; CHECK-LABEL: test_sub_1_setcc_eq:

-; CHECK: # BB#0: # %entry

-; CHECK-NEXT: lock decq (%rdi)

-; CHECK-NEXT: sete %al

-; CHECK-NEXT: retq

-entry:

- %tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst

- %tmp1 = icmp eq i64 %tmp0, 1

- %tmp2 = zext i1 %tmp1 to i8

- ret i8 %tmp2

-define i8 @test_add_5_setcc_ne(i64* %p) #0 {

-; CHECK-LABEL: test_add_5_setcc_ne:

-; CHECK: # BB#0: # %entry

-; CHECK-NEXT: lock addq $5, (%rdi)

-; CHECK-NEXT: setne %al

-; CHECK-NEXT: retq

-entry:

- %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst

- %tmp1 = icmp ne i64 %tmp0, -5

- %tmp2 = zext i1 %tmp1 to i8

- ret i8 %tmp2

-define i8 @test_add_5_setcc_ne_comparand_mismatch(i64* %p) #0 {

-; CHECK-LABEL: test_add_5_setcc_ne_comparand_mismatch:

-; CHECK: # BB#0: # %entry

-; CHECK-NEXT: movl $5, %eax

-; CHECK-NEXT: lock xaddq %rax, (%rdi)

-; CHECK-NEXT: testq %rax, %rax

-; CHECK-NEXT: setne %al

-; CHECK-NEXT: retq

-entry:

- %tmp0 = atomicrmw add i64* %p, i64 5 seq_cst

- %tmp1 = icmp ne i64 %tmp0, 0

- %tmp2 = zext i1 %tmp1 to i8

- ret i8 %tmp2

-declare void @g()

-define zeroext i1 @test_sub_1_setcc_jcc(i64* %p) local_unnamed_addr #0 {

-; TODO: It's possible to use "lock dec" here, but both uses of the cmp need to

-; be updated.

-; CHECK-LABEL: test_sub_1_setcc_jcc:

-; CHECK: # BB#0: # %entry

-; CHECK: movq $-1, %rax

-; CHECK-NEXT: lock xaddq %rax, (%rdi)

-; CHECK-NEXT: cmpq $1, %rax

-; CHECK-NEXT: sete %bl

-; CHECK-NEXT: jne

-entry:

- %add = atomicrmw volatile add i64* %p, i64 -1 seq_cst

- %cmp = icmp ne i64 %add, 1

- %not = xor i1 %cmp, true

- br i1 %cmp, label %else, label %then

-then:

- tail call void @g()

- br label %else

-else:

- ret i1 %not

attributes #0 = { nounwind }

diff --git a/test/CodeGen/X86/slow-pmulld.ll b/test/CodeGen/X86/slow-pmulld.ll
index ff6682090a26..1de19d2334d4 100644
--- a/test/CodeGen/X86/slow-pmulld.ll
+++ b/test/CodeGen/X86/slow-pmulld.ll

@@ -4,6 +4,9 @@

; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE4-32

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE4-64

+; Make sure that the slow-pmulld feature can be used without SSE4.1.

+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1

define <4 x i32> @foo(<4 x i8> %A) {

; CHECK32-LABEL: foo:

; CHECK32: # BB#0:

diff --git a/test/ThinLTO/X86/lazyload_metadata.ll b/test/ThinLTO/X86/lazyload_metadata.ll
index 3c4345831aa3..7bd3e641bc77 100644
--- a/test/ThinLTO/X86/lazyload_metadata.ll
+++ b/test/ThinLTO/X86/lazyload_metadata.ll

@@ -17,7 +17,7 @@

; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \

; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY

; NOTLAZY: 58 bitcode-reader - Number of Metadata records loaded

-; NOTLAZY: 8 bitcode-reader - Number of MDStrings loaded

+; NOTLAZY: 6 bitcode-reader - Number of MDStrings loaded

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

@@ -48,7 +48,7 @@ define void @globalfunc3(i32 %arg) {

!3 = !{!"3"}

!4 = !{!"4"}

!5 = !{!"5"}

-!6 = !{!"6"}

+!6 = !{!9}

!7 = !{!"7"}

!8 = !{!"8"}

-!9 = !{!"9"}

+!9 = !{!6}

diff --git a/test/Transforms/LoopStrengthReduce/pr31627.ll b/test/Transforms/LoopStrengthReduce/pr31627.ll
new file mode 100644
index 000000000000..4bd4fc273d7b
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr31627.ll

@@ -0,0 +1,58 @@

+; RUN: opt -S -loop-reduce < %s | FileCheck %s

+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"

+target triple = "x86_64-pc-windows-msvc19.0.24215"

+define void @fn3() personality i32 (...)* @__CxxFrameHandler3 {

+entry:

+ %call = invoke i32 @fn2()

+ to label %for.cond.preheader unwind label %catch.dispatch2

+for.cond.preheader: ; preds = %entry

+ br label %for.cond

+for.cond: ; preds = %for.cond.preheader, %for.cond

+ %b.0 = phi i32 [ %inc, %for.cond ], [ %call, %for.cond.preheader ]

+ %inc = add nsw i32 %b.0, 1

+ invoke void @fn1(i32 %inc)

+ to label %for.cond unwind label %catch.dispatch

+; CHECK: %[[add:.*]] = add i32 %call, 1

+; CHECK: br label %for.cond

+; CHECK: for.cond: ; preds = %for.cond, %for.cond.preheader

+; CHECK: %[[lsr_iv:.*]] = phi i32 [ %lsr.iv.next, %for.cond ], [ %[[add]], %for.cond.preheader ]

+; CHECK: %[[lsr_iv_next:.*]] = add i32 %lsr.iv, 1

+; CHECK: invoke void @fn1(i32 %[[lsr_iv]])

+catch.dispatch: ; preds = %for.cond

+ %0 = catchswitch within none [label %catch] unwind label %catch.dispatch2

+catch: ; preds = %catch.dispatch

+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]

+ invoke void @_CxxThrowException(i8* null, i8* null) #2 [ "funclet"(token %1) ]

+ to label %unreachable unwind label %catch.dispatch2

+catch.dispatch2: ; preds = %catch.dispatch, %catch, %entry

+ %a.0 = phi i32 [ undef, %entry ], [ %call, %catch ], [ %call, %catch.dispatch ]

+ %2 = catchswitch within none [label %catch3] unwind to caller

+catch3: ; preds = %catch.dispatch2

+ %3 = catchpad within %2 [i8* null, i32 64, i8* null]

+ call void @fn1(i32 %a.0) [ "funclet"(token %3) ]

+ catchret from %3 to label %try.cont4

+try.cont4: ; preds = %catch3

+ ret void

+unreachable: ; preds = %catch

+ unreachable

+declare i32 @fn2()

+declare i32 @__CxxFrameHandler3(...)

+declare void @fn1(i32)

+declare void @_CxxThrowException(i8*, i8*)

diff --git a/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
new file mode 100644
index 000000000000..32bfcd2275ac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll

@@ -0,0 +1,56 @@

+; REQUIRES: asserts

+; RUN: opt < %s -loop-vectorize -instcombine -S -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s

+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

+target triple = "x86_64-unknown-linux-gnu"

+; CHECK-LABEL: PR31671

+; Check a pointer in which one of its uses is consecutive-like and another of

+; its uses is non-consecutive-like. In the test case below, %tmp3 is the

+; pointer operand of an interleaved load, making it consecutive-like. However,

+; it is also the pointer operand of a non-interleaved store that will become a

+; scatter operation. %tmp3 (and the induction variable) should not be marked

+; uniform-after-vectorization.

+; CHECK: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i

+; CHECK-NOT: LV: Found uniform instruction: %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i

+; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]

+; CHECK-NOT: LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 5

+; CHECK: vector.body:

+; CHECK: %vec.ind = phi <16 x i64>

+; CHECK: %[[T0:.+]] = extractelement <16 x i64> %vec.ind, i32 0

+; CHECK: %[[T1:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %[[T0]]

+; CHECK: %[[T2:.+]] = bitcast float* %[[T1]] to <80 x float>*

+; CHECK: load <80 x float>, <80 x float>* %[[T2]], align 4

+; CHECK: %[[T3:.+]] = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %[[T0]]

+; CHECK: %[[T4:.+]] = bitcast float* %[[T3]] to <80 x float>*

+; CHECK: load <80 x float>, <80 x float>* %[[T4]], align 4

+; CHECK: %VectorGep = getelementptr inbounds %data, %data* %d, i64 0, i32 0, <16 x i64> %vec.ind

+; CHECK: call void @llvm.masked.scatter.v16f32({{.*}}, <16 x float*> %VectorGep, {{.*}})

+; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body

+%data = type { [32000 x float], [3 x i32], [4 x i8], [32000 x float] }

+define void @PR31671(float %x, %data* %d) #0 {

+entry:

+ br label %for.body

+for.body:

+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]

+ %tmp0 = getelementptr inbounds %data, %data* %d, i64 0, i32 3, i64 %i

+ %tmp1 = load float, float* %tmp0, align 4

+ %tmp2 = fmul float %x, %tmp1

+ %tmp3 = getelementptr inbounds %data, %data* %d, i64 0, i32 0, i64 %i

+ %tmp4 = load float, float* %tmp3, align 4

+ %tmp5 = fadd float %tmp4, %tmp2

+ store float %tmp5, float* %tmp3, align 4

+ %i.next = add nuw nsw i64 %i, 5

+ %cond = icmp slt i64 %i.next, 32000

+ br i1 %cond, label %for.body, label %for.end

+for.end:

+ ret void

+attributes #0 = { "target-cpu"="knl" }

diff --git a/test/Transforms/NewGVN/pr31613.ll b/test/Transforms/NewGVN/pr31613.ll
new file mode 100644
index 000000000000..d3a41830c789
--- /dev/null
+++ b/test/Transforms/NewGVN/pr31613.ll

@@ -0,0 +1,135 @@

+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py

+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s

+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

+;; Both of these tests are tests of phi nodes that end up all equivalent to each other

+;; Without proper leader ordering, we will end up cycling the leader between all of them and never converge.

+define void @foo() {

+; CHECK-LABEL: @foo(

+; CHECK-NEXT: bb:

+; CHECK-NEXT: br label [[BB1:%.*]]

+; CHECK: bb1:

+; CHECK-NEXT: [[TMP:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 1, [[BB18:%.*]] ]

+; CHECK-NEXT: br label [[BB2:%.*]]

+; CHECK: bb2:

+; CHECK-NEXT: br label [[BB4:%.*]]

+; CHECK: bb4:

+; CHECK-NEXT: br i1 undef, label [[BB18]], label [[BB7:%.*]]

+; CHECK: bb7:

+; CHECK-NEXT: br label [[BB9:%.*]]

+; CHECK: bb9:

+; CHECK-NEXT: br i1 undef, label [[BB2]], label [[BB11:%.*]]

+; CHECK: bb11:

+; CHECK-NEXT: br i1 undef, label [[BB16:%.*]], label [[BB14:%.*]]

+; CHECK: bb14:

+; CHECK-NEXT: br label [[BB4]]

+; CHECK: bb16:

+; CHECK-NEXT: br label [[BB7]]

+; CHECK: bb18:

+; CHECK-NEXT: br label [[BB1]]

+bb:

+ br label %bb1

+bb1: ; preds = %bb18, %bb

+ %tmp = phi i32 [ 0, %bb ], [ 1, %bb18 ]

+ br label %bb2

+bb2: ; preds = %bb9, %bb1

+ %tmp3 = phi i32 [ %tmp, %bb1 ], [ %tmp8, %bb9 ]

+ br label %bb4

+bb4: ; preds = %bb14, %bb2

+ %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp15, %bb14 ]

+ br i1 undef, label %bb18, label %bb7

+bb7: ; preds = %bb16, %bb4

+ %tmp8 = phi i32 [ %tmp17, %bb16 ], [ %tmp5, %bb4 ]

+ br label %bb9

+bb9: ; preds = %bb7

+ br i1 undef, label %bb2, label %bb11

+bb11: ; preds = %bb9

+ br i1 undef, label %bb16, label %bb14

+bb14: ; preds = %bb11

+ %tmp15 = phi i32 [ %tmp8, %bb11 ]

+ br label %bb4

+bb16: ; preds = %bb11

+ %tmp17 = phi i32 [ %tmp8, %bb11 ]

+ br label %bb7

+bb18: ; preds = %bb4

+ br label %bb1

+%struct.a = type {}

+%struct.b = type {}

+declare void @c.d.p(i64, i8*)

+define void @e() {

+; CHECK-LABEL: @e(

+; CHECK-NEXT: [[F:%.*]] = alloca i32

+; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0

+; CHECK-NEXT: br label [[H:%.*]]

+; CHECK: h:

+; CHECK-NEXT: call void @c.d.p(i64 8, i8* undef)

+; CHECK-NEXT: [[I:%.*]] = load i32, i32* [[F]]

+; CHECK-NEXT: [[J:%.*]] = load i32, i32* null

+; CHECK-NEXT: [[K:%.*]] = icmp eq i32 [[I]], [[J]]

+; CHECK-NEXT: br i1 [[K]], label [[L:%.*]], label [[Q:%.*]]

+; CHECK: l:

+; CHECK-NEXT: br label [[R:%.*]]

+; CHECK: q:

+; CHECK-NEXT: [[M:%.*]] = load %struct.a*, %struct.a** null

+; CHECK-NEXT: br label [[R]]

+; CHECK: r:

+; CHECK-NEXT: switch i32 undef, label [[N:%.*]] [

+; CHECK-NEXT: i32 0, label [[S:%.*]]

+; CHECK-NEXT: ]

+; CHECK: s:

+; CHECK-NEXT: store i32 undef, i32* [[F]], !g !0

+; CHECK-NEXT: br label [[H]]

+; CHECK: n:

+; CHECK-NEXT: [[O:%.*]] = load %struct.a*, %struct.a** null

+; CHECK-NEXT: ret void

+ %f = alloca i32

+ store i32 undef, i32* %f, !g !0

+ br label %h

+h: ; preds = %s, %0

+ call void @c.d.p(i64 8, i8* undef)

+ %i = load i32, i32* %f

+ %j = load i32, i32* null

+ %k = icmp eq i32 %i, %j

+ br i1 %k, label %l, label %q

+l: ; preds = %h

+ br label %r

+q: ; preds = %h

+ %m = load %struct.a*, %struct.a** null

+ %1 = bitcast %struct.a* %m to %struct.b*

+ br label %r

+r: ; preds = %q, %l

+ switch i32 undef, label %n [

+ i32 0, label %s

+ ]

+s: ; preds = %r

+ store i32 undef, i32* %f, !g !0

+ br label %h

+n: ; preds = %r

+ %o = load %struct.a*, %struct.a** null

+ %2 = bitcast %struct.a* %o to %struct.b*

+ ret void

+!0 = !{}