summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/ADCE/basictest.ll3
-rw-r--r--test/Transforms/AddDiscriminators/basic.ll14
-rw-r--r--test/Transforms/AddDiscriminators/call.ll52
-rw-r--r--test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll30
-rw-r--r--test/Transforms/AddDiscriminators/diamond.ll72
-rw-r--r--test/Transforms/AddDiscriminators/first-only.ll14
-rw-r--r--test/Transforms/AddDiscriminators/multiple.ll12
-rw-r--r--test/Transforms/AddDiscriminators/no-discriminators.ll14
-rw-r--r--test/Transforms/AddDiscriminators/oneline.ll102
-rw-r--r--test/Transforms/ArgumentPromotion/dbg.ll12
-rw-r--r--test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll24
-rw-r--r--test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll24
-rw-r--r--test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll20
-rw-r--r--test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll82
-rw-r--r--test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll11
-rw-r--r--test/Transforms/AtomicExpand/X86/lit.local.cfg2
-rw-r--r--test/Transforms/BBVectorize/X86/wr-aliases.ll2
-rw-r--r--test/Transforms/BBVectorize/simple3.ll16
-rw-r--r--test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll9
-rw-r--r--test/Transforms/CodeGenPrepare/AArch64/free-zext.ll82
-rw-r--r--test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll95
-rw-r--r--test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll118
-rw-r--r--test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll56
-rw-r--r--test/Transforms/CodeGenPrepare/X86/select.ll141
-rw-r--r--test/Transforms/CodeGenPrepare/X86/widen_switch.ll95
-rw-r--r--test/Transforms/CodeGenPrepare/invariant.group.ll23
-rw-r--r--test/Transforms/CodeGenPrepare/statepoint-relocate.ll87
-rw-r--r--test/Transforms/ConstProp/calls.ll236
-rw-r--r--test/Transforms/ConstProp/insertvalue.ll10
-rw-r--r--test/Transforms/ConstProp/loads.ll7
-rw-r--r--test/Transforms/ConstantMerge/merge-both.ll2
-rw-r--r--test/Transforms/CorrelatedValuePropagation/non-null.ll60
-rw-r--r--test/Transforms/CorrelatedValuePropagation/range.ll24
-rw-r--r--test/Transforms/CorrelatedValuePropagation/select.ll2
-rw-r--r--test/Transforms/CrossDSOCFI/basic.ll88
-rw-r--r--test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll24
-rw-r--r--test/Transforms/DeadArgElim/aggregates.ll26
-rw-r--r--test/Transforms/DeadArgElim/dbginfo.ll15
-rw-r--r--test/Transforms/DeadArgElim/naked_functions.ll31
-rw-r--r--test/Transforms/DeadArgElim/operandbundle.ll12
-rw-r--r--test/Transforms/DeadStoreElimination/calloc-store.ll65
-rw-r--r--test/Transforms/DeadStoreElimination/inst-limits.ll8
-rw-r--r--test/Transforms/DeadStoreElimination/simple.ll147
-rw-r--r--test/Transforms/EarlyCSE/AArch64/ldstN.ll18
-rw-r--r--test/Transforms/EarlyCSE/atomics.ll259
-rw-r--r--test/Transforms/EarlyCSE/basic.ll74
-rw-r--r--test/Transforms/EarlyCSE/fence.ll86
-rw-r--r--test/Transforms/Float2Int/basic.ll10
-rw-r--r--test/Transforms/ForcedFunctionAttrs/forced.ll12
-rw-r--r--test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll5
-rw-r--r--test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll21
-rw-r--r--test/Transforms/FunctionAttrs/2010-10-30-volatile.ll4
-rw-r--r--test/Transforms/FunctionAttrs/atomic.ll4
-rw-r--r--test/Transforms/FunctionAttrs/nonnull.ll74
-rw-r--r--test/Transforms/FunctionAttrs/norecurse.ll57
-rw-r--r--test/Transforms/FunctionAttrs/optnone.ll6
-rw-r--r--test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll30
-rw-r--r--test/Transforms/FunctionAttrs/readattrs.ll38
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport.ll87
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport_debug.ll27
-rw-r--r--test/Transforms/FunctionImport/funcimport.ll75
-rw-r--r--test/Transforms/FunctionImport/funcimport_debug.ll45
-rw-r--r--test/Transforms/GCOVProfiling/function-numbering.ll14
-rw-r--r--test/Transforms/GCOVProfiling/global-ctor.ll8
-rw-r--r--test/Transforms/GCOVProfiling/linezero.ll18
-rw-r--r--test/Transforms/GCOVProfiling/linkagename.ll6
-rw-r--r--test/Transforms/GCOVProfiling/return-block.ll6
-rw-r--r--test/Transforms/GCOVProfiling/version.ll6
-rw-r--r--test/Transforms/GVN/2009-03-10-PREOnVoid.ll28
-rw-r--r--test/Transforms/GVN/assume-equal.ll235
-rw-r--r--test/Transforms/GVN/crash-no-aa.ll2
-rw-r--r--test/Transforms/GVN/funclet.ll44
-rw-r--r--test/Transforms/GVN/invariant-load.ll17
-rw-r--r--test/Transforms/GVN/invariant.group.ll337
-rw-r--r--test/Transforms/GVN/load-pre-nonlocal.ll4
-rw-r--r--test/Transforms/GVN/no_speculative_loads_with_asan.ll57
-rw-r--r--test/Transforms/GVN/phi-translate.ll4
-rw-r--r--test/Transforms/GVN/pr14166.ll2
-rw-r--r--test/Transforms/GVN/pr24426.ll18
-rw-r--r--test/Transforms/GVN/pr25440.ll108
-rw-r--r--test/Transforms/GVN/pre-gep-load.ll31
-rw-r--r--test/Transforms/GVN/pre-load.ll41
-rw-r--r--test/Transforms/GVN/range.ll24
-rw-r--r--test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll14
-rw-r--r--test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll2
-rw-r--r--test/Transforms/GlobalDCE/pr20981.ll4
-rw-r--r--test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll2
-rw-r--r--test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll4
-rw-r--r--test/Transforms/GlobalOpt/2009-03-05-dbg.ll8
-rw-r--r--test/Transforms/GlobalOpt/alias-resolve.ll22
-rw-r--r--test/Transforms/GlobalOpt/alias-used-address-space.ll6
-rw-r--r--test/Transforms/GlobalOpt/alias-used-section.ll2
-rw-r--r--test/Transforms/GlobalOpt/alias-used.ll16
-rw-r--r--test/Transforms/GlobalOpt/assume.ll21
-rw-r--r--test/Transforms/GlobalOpt/available_externally_global_ctors.ll22
-rw-r--r--test/Transforms/GlobalOpt/deadglobal.ll3
-rw-r--r--test/Transforms/GlobalOpt/externally-initialized-aggregate.ll50
-rw-r--r--test/Transforms/GlobalOpt/externally-initialized.ll37
-rw-r--r--test/Transforms/GlobalOpt/global-demotion.ll80
-rw-r--r--test/Transforms/GlobalOpt/invariant.group.barrier.ll79
-rw-r--r--test/Transforms/GlobalOpt/localize-constexpr.ll28
-rw-r--r--test/Transforms/GlobalOpt/metadata.ll2
-rw-r--r--test/Transforms/GlobalOpt/tls.ll1
-rw-r--r--test/Transforms/GlobalOpt/unnamed-addr.ll6
-rw-r--r--test/Transforms/IndVarSimplify/bec-cmp.ll47
-rw-r--r--test/Transforms/IndVarSimplify/const_phi.ll33
-rw-r--r--test/Transforms/IndVarSimplify/eliminate-comparison.ll348
-rw-r--r--test/Transforms/IndVarSimplify/iv-widen.ll30
-rw-r--r--test/Transforms/IndVarSimplify/loop-invariant-conditions.ll279
-rw-r--r--test/Transforms/IndVarSimplify/pr24356.ll63
-rw-r--r--test/Transforms/IndVarSimplify/pr24783.ll30
-rw-r--r--test/Transforms/IndVarSimplify/pr24804.ll25
-rw-r--r--test/Transforms/IndVarSimplify/pr24952.ll27
-rw-r--r--test/Transforms/IndVarSimplify/pr24956.ll37
-rw-r--r--test/Transforms/IndVarSimplify/pr25047.ll49
-rw-r--r--test/Transforms/IndVarSimplify/pr25051.ll44
-rw-r--r--test/Transforms/IndVarSimplify/pr25060.ll37
-rw-r--r--test/Transforms/IndVarSimplify/pr25360.ll33
-rw-r--r--test/Transforms/IndVarSimplify/pr25421.ll30
-rw-r--r--test/Transforms/IndVarSimplify/pr25578.ll45
-rw-r--r--test/Transforms/IndVarSimplify/tripcount_infinite.ll15
-rw-r--r--test/Transforms/IndVarSimplify/widen-loop-comp.ll160
-rw-r--r--test/Transforms/IndVarSimplify/zext-nuw.ll49
-rw-r--r--test/Transforms/InferFunctionAttrs/annotate.ll (renamed from test/Transforms/FunctionAttrs/annotate-1.ll)5
-rw-r--r--test/Transforms/Inline/alloca-dbgdeclare-merge.ll102
-rw-r--r--test/Transforms/Inline/alloca-dbgdeclare.ll16
-rw-r--r--test/Transforms/Inline/debug-info-duplicate-calls.ll26
-rw-r--r--test/Transforms/Inline/debug-invoke.ll4
-rw-r--r--test/Transforms/Inline/deopt-bundles.ll203
-rw-r--r--test/Transforms/Inline/ignore-debug-info.ll16
-rw-r--r--test/Transforms/Inline/inline-assume.ll31
-rw-r--r--test/Transforms/Inline/inline-cold-callee.ll39
-rw-r--r--test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll30
-rw-r--r--test/Transforms/Inline/inline-hot-callee.ll39
-rw-r--r--test/Transforms/Inline/inline-optsize.ll2
-rw-r--r--test/Transforms/Inline/inline_dbg_declare.ll26
-rw-r--r--test/Transforms/Inline/inline_invoke.ll3
-rw-r--r--test/Transforms/Inline/noalias-calls.ll19
-rw-r--r--test/Transforms/Inline/noalias-cs.ll12
-rw-r--r--test/Transforms/Inline/noalias2.ll4
-rw-r--r--test/Transforms/Inline/zero-cost.ll17
-rw-r--r--test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll2
-rw-r--r--test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll2
-rw-r--r--test/Transforms/InstCombine/LandingPadClauses.ll11
-rw-r--r--test/Transforms/InstCombine/add2.ll10
-rw-r--r--test/Transforms/InstCombine/alias-recursion.ll2
-rw-r--r--test/Transforms/InstCombine/all-bits-shift.ll46
-rw-r--r--test/Transforms/InstCombine/alloca.ll11
-rw-r--r--test/Transforms/InstCombine/and-compare.ll23
-rw-r--r--test/Transforms/InstCombine/and2.ll68
-rw-r--r--test/Transforms/InstCombine/apint-or.ll79
-rw-r--r--test/Transforms/InstCombine/apint-or1.ll36
-rw-r--r--test/Transforms/InstCombine/apint-or2.ll35
-rw-r--r--test/Transforms/InstCombine/assume-redundant.ll26
-rw-r--r--test/Transforms/InstCombine/bitcast-alias-function.ll24
-rw-r--r--test/Transforms/InstCombine/bitcast-bitcast.ll84
-rw-r--r--test/Transforms/InstCombine/bitcast-vec-canon.ll25
-rw-r--r--test/Transforms/InstCombine/bitcast.ll55
-rw-r--r--test/Transforms/InstCombine/bitreverse-fold.ll11
-rw-r--r--test/Transforms/InstCombine/bitreverse-recognize.ll114
-rw-r--r--test/Transforms/InstCombine/blend_x86.ll102
-rw-r--r--test/Transforms/InstCombine/bswap-fold.ll6
-rw-r--r--test/Transforms/InstCombine/bswap-known-bits.ll47
-rw-r--r--test/Transforms/InstCombine/bswap.ll14
-rw-r--r--test/Transforms/InstCombine/call_nonnull_arg.ll20
-rw-r--r--test/Transforms/InstCombine/cast-callee-deopt-bundles.ll11
-rw-r--r--test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll108
-rw-r--r--test/Transforms/InstCombine/cast-set.ll4
-rw-r--r--test/Transforms/InstCombine/cast.ll54
-rw-r--r--test/Transforms/InstCombine/compare-alloca.ll97
-rw-r--r--test/Transforms/InstCombine/compare-signs.ll40
-rw-r--r--test/Transforms/InstCombine/constant-fold-alias.ll4
-rw-r--r--test/Transforms/InstCombine/ctpop.ll45
-rw-r--r--test/Transforms/InstCombine/debug-line.ll6
-rw-r--r--test/Transforms/InstCombine/debuginfo.ll12
-rw-r--r--test/Transforms/InstCombine/demorgan-zext.ll34
-rw-r--r--test/Transforms/InstCombine/div.ll24
-rw-r--r--test/Transforms/InstCombine/exp2-1.ll19
-rw-r--r--test/Transforms/InstCombine/extractvalue.ll22
-rw-r--r--test/Transforms/InstCombine/fabs.ll25
-rw-r--r--test/Transforms/InstCombine/fast-math.ll141
-rw-r--r--test/Transforms/InstCombine/ffs-1.ll69
-rw-r--r--test/Transforms/InstCombine/fold-phi-load-metadata.ll69
-rw-r--r--test/Transforms/InstCombine/gc.relocate.ll39
-rw-r--r--test/Transforms/InstCombine/gepphigep.ll50
-rw-r--r--test/Transforms/InstCombine/icmp-range.ll89
-rw-r--r--test/Transforms/InstCombine/icmp-shr.ll9
-rw-r--r--test/Transforms/InstCombine/icmp.ll73
-rw-r--r--test/Transforms/InstCombine/inline-intrinsic-assert.ll2
-rw-r--r--test/Transforms/InstCombine/insert-extract-shuffle.ll47
-rw-r--r--test/Transforms/InstCombine/intrinsics.ll65
-rw-r--r--test/Transforms/InstCombine/lifetime.ll93
-rw-r--r--test/Transforms/InstCombine/load-cmp.ll7
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata-2.ll20
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata-3.ll20
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata-4.ll20
-rw-r--r--test/Transforms/InstCombine/load-combine-metadata.ll6
-rw-r--r--test/Transforms/InstCombine/loadstore-metadata.ll38
-rw-r--r--test/Transforms/InstCombine/log-pow-nofastmath.ll30
-rw-r--r--test/Transforms/InstCombine/log-pow.ll41
-rw-r--r--test/Transforms/InstCombine/malloc-free-delete.ll11
-rw-r--r--test/Transforms/InstCombine/memcmp-1.ll53
-rw-r--r--test/Transforms/InstCombine/memset_chk-1.ll26
-rw-r--r--test/Transforms/InstCombine/minmax-fp.ll156
-rw-r--r--test/Transforms/InstCombine/neon-intrinsics.ll12
-rw-r--r--test/Transforms/InstCombine/no_cgscc_assert.ll2
-rw-r--r--test/Transforms/InstCombine/nonnull-attribute.ll19
-rw-r--r--test/Transforms/InstCombine/not.ll47
-rw-r--r--test/Transforms/InstCombine/objsize-address-space.ll2
-rw-r--r--test/Transforms/InstCombine/objsize.ll4
-rw-r--r--test/Transforms/InstCombine/or.ll2
-rw-r--r--test/Transforms/InstCombine/phi-load-metadata-2.ll30
-rw-r--r--test/Transforms/InstCombine/phi-load-metadata-3.ll30
-rw-r--r--test/Transforms/InstCombine/phi-load-metadata.ll30
-rw-r--r--test/Transforms/InstCombine/phi.ll130
-rw-r--r--test/Transforms/InstCombine/pow-1.ll2
-rw-r--r--test/Transforms/InstCombine/pow-4.ll120
-rw-r--r--test/Transforms/InstCombine/pow-exp-nofastmath.ll17
-rw-r--r--test/Transforms/InstCombine/pow-exp.ll28
-rw-r--r--test/Transforms/InstCombine/pow-exp2.ll19
-rw-r--r--test/Transforms/InstCombine/pow-sqrt.ll15
-rw-r--r--test/Transforms/InstCombine/pr20059.ll16
-rw-r--r--test/Transforms/InstCombine/pr24605.ll15
-rw-r--r--test/Transforms/InstCombine/pr25745.ll20
-rw-r--r--test/Transforms/InstCombine/shift.ll4
-rw-r--r--test/Transforms/InstCombine/sincospi.ll9
-rw-r--r--test/Transforms/InstCombine/sqrt-nofast.ll25
-rw-r--r--test/Transforms/InstCombine/statepoint.ll20
-rw-r--r--test/Transforms/InstCombine/store.ll113
-rw-r--r--test/Transforms/InstCombine/strto-1.ll2
-rw-r--r--test/Transforms/InstCombine/tan-nofastmath.ll17
-rw-r--r--test/Transforms/InstCombine/tan.ll24
-rw-r--r--test/Transforms/InstCombine/token.ll89
-rw-r--r--test/Transforms/InstCombine/trunc.ll42
-rw-r--r--test/Transforms/InstCombine/unpack-fca.ll168
-rw-r--r--test/Transforms/InstCombine/vec_demanded_elts.ll359
-rw-r--r--test/Transforms/InstCombine/vec_shuffle.ll27
-rw-r--r--test/Transforms/InstCombine/vector_gep2.ll23
-rw-r--r--test/Transforms/InstCombine/x86-f16c.ll61
-rw-r--r--test/Transforms/InstCombine/x86-pmovsx.ll136
-rw-r--r--test/Transforms/InstCombine/x86-pmovzx.ll136
-rw-r--r--test/Transforms/InstCombine/x86-pshufb.ll267
-rw-r--r--test/Transforms/InstCombine/x86-sse4a.ll336
-rw-r--r--test/Transforms/InstCombine/x86-vector-shifts.ll1318
-rw-r--r--test/Transforms/InstCombine/x86-xop.ll209
-rw-r--r--test/Transforms/InstCombine/xor.ll8
-rw-r--r--test/Transforms/InstSimplify/add-mask.ll65
-rw-r--r--test/Transforms/InstSimplify/apint-or.ll36
-rw-r--r--test/Transforms/InstSimplify/bswap.ll41
-rw-r--r--test/Transforms/InstSimplify/compare.ll8
-rw-r--r--test/Transforms/InstSimplify/implies.ll217
-rw-r--r--test/Transforms/InstSimplify/shift-128-kb.ll22
-rw-r--r--test/Transforms/InstSimplify/shr-nop.ll12
-rw-r--r--test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll8
-rw-r--r--test/Transforms/Internalize/comdat.ll52
-rw-r--r--test/Transforms/Internalize/local-visibility.ll8
-rw-r--r--test/Transforms/JumpThreading/basic.ll34
-rw-r--r--test/Transforms/JumpThreading/implied-cond.ll98
-rw-r--r--test/Transforms/JumpThreading/phi-known.ll66
-rw-r--r--test/Transforms/JumpThreading/select.ll30
-rw-r--r--test/Transforms/JumpThreading/update-edge-weight.ll43
-rw-r--r--test/Transforms/LCSSA/mixed-catch.ll95
-rw-r--r--test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll2
-rw-r--r--test/Transforms/LICM/argmemonly-call.ll69
-rw-r--r--test/Transforms/LICM/debug-value.ll12
-rw-r--r--test/Transforms/LICM/hoist-deref-load.ll44
-rw-r--r--test/Transforms/LICM/hoist-invariant-load.ll2
-rw-r--r--test/Transforms/LICM/pr23608.ll2
-rw-r--r--test/Transforms/LoopDistribute/basic-with-memchecks.ll2
-rw-r--r--test/Transforms/LoopDistribute/bounds-expansion-bug.ll106
-rw-r--r--test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll57
-rw-r--r--test/Transforms/LoopIdiom/basic.ll107
-rw-r--r--test/Transforms/LoopIdiom/debug-line.ll10
-rw-r--r--test/Transforms/LoopLoadElim/backward.ll32
-rw-r--r--test/Transforms/LoopLoadElim/def-store-before-load.ll35
-rw-r--r--test/Transforms/LoopLoadElim/forward.ll47
-rw-r--r--test/Transforms/LoopLoadElim/memcheck.ll52
-rw-r--r--test/Transforms/LoopLoadElim/multiple-stores-same-block.ll48
-rw-r--r--test/Transforms/LoopLoadElim/unknown-dep.ll54
-rw-r--r--test/Transforms/LoopReroll/negative.ll48
-rw-r--r--test/Transforms/LoopReroll/reroll_with_dbg.ll139
-rw-r--r--test/Transforms/LoopRotate/dbgvalue.ll14
-rw-r--r--test/Transforms/LoopSimplify/dbg-loc.ll4
-rw-r--r--test/Transforms/LoopSimplify/single-backedge.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll156
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg3
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll113
-rw-r--r--test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll50
-rw-r--r--test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg2
-rw-r--r--test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll45
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/funclet.ll216
-rw-r--r--test/Transforms/LoopStrengthReduce/pr12018.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/pr25541.ll48
-rw-r--r--test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/sext-ind-var.ll140
-rw-r--r--test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg3
-rw-r--r--test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll33
-rw-r--r--test/Transforms/LoopUnroll/X86/partial.ll9
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-bad-geps.ll34
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-crashers.ll102
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll57
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll97
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll207
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll23
-rw-r--r--test/Transforms/LoopUnroll/pr18861.ll91
-rw-r--r--test/Transforms/LoopUnroll/rebuild_lcssa.ll119
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop1.ll4
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas.ll66
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll2
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll2
-rw-r--r--test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll28
-rw-r--r--test/Transforms/LoopUnswitch/LIV-loop-condtion.ll28
-rw-r--r--test/Transforms/LoopUnswitch/basictest.ll39
-rw-r--r--test/Transforms/LoopUnswitch/cleanuppad.ll44
-rw-r--r--test/Transforms/LoopUnswitch/cold-loop.ll52
-rw-r--r--test/Transforms/LoopUnswitch/copy-metadata.ll23
-rw-r--r--test/Transforms/LoopUnswitch/infinite-loop.ll10
-rw-r--r--test/Transforms/LoopUnswitch/trivial-unswitch.ll47
-rw-r--r--test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll4
-rw-r--r--test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll54
-rw-r--r--test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll39
-rw-r--r--test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll243
-rw-r--r--test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll191
-rw-r--r--test/Transforms/LoopVectorize/ARM/interleaved_cost.ll39
-rw-r--r--test/Transforms/LoopVectorize/ARM/vector_cast.ll37
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll40
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll30
-rw-r--r--test/Transforms/LoopVectorize/X86/masked_load_store.ll142
-rw-r--r--test/Transforms/LoopVectorize/X86/metadata-enable.ll6
-rw-r--r--test/Transforms/LoopVectorize/X86/no_fpmath.ll104
-rw-r--r--test/Transforms/LoopVectorize/X86/powof2div.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/reduction-crash.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/reg-usage.ll71
-rw-r--r--test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll46
-rw-r--r--test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll8
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll16
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll113
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks.ll8
-rw-r--r--test/Transforms/LoopVectorize/conditional-assignment.ll6
-rw-r--r--test/Transforms/LoopVectorize/control-flow.ll8
-rw-r--r--test/Transforms/LoopVectorize/dbg.value.ll8
-rw-r--r--test/Transforms/LoopVectorize/debugloc.ll18
-rw-r--r--test/Transforms/LoopVectorize/gep_with_bitcast.ll40
-rw-r--r--test/Transforms/LoopVectorize/if-pred-stores.ll43
-rw-r--r--test/Transforms/LoopVectorize/induction.ll13
-rw-r--r--test/Transforms/LoopVectorize/miniters.ll45
-rw-r--r--test/Transforms/LoopVectorize/minmax_reduction.ll104
-rw-r--r--test/Transforms/LoopVectorize/no_array_bounds.ll6
-rw-r--r--test/Transforms/LoopVectorize/no_outside_user.ll2
-rw-r--r--test/Transforms/LoopVectorize/no_switch.ll18
-rw-r--r--test/Transforms/LoopVectorize/nontemporal.ll47
-rw-r--r--test/Transforms/LoopVectorize/optsize.ll43
-rw-r--r--test/Transforms/LoopVectorize/ptr-induction.ll34
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll2
-rw-r--r--test/Transforms/LoopVectorize/reverse_induction.ll9
-rw-r--r--test/Transforms/LoopVectorize/runtime-check.ll6
-rw-r--r--test/Transforms/LoopVectorize/runtime-limit.ll21
-rw-r--r--test/Transforms/LowerBitSets/function-ext.ll22
-rw-r--r--test/Transforms/LowerBitSets/function.ll35
-rw-r--r--test/Transforms/LowerBitSets/nonstring.ll34
-rw-r--r--test/Transforms/LowerBitSets/pr25902.ll21
-rw-r--r--test/Transforms/LowerBitSets/simple.ll34
-rw-r--r--test/Transforms/LowerExpectIntrinsic/basic.ll2
-rw-r--r--test/Transforms/LowerSwitch/delete-default-block-crash.ll27
-rw-r--r--test/Transforms/LowerSwitch/feature.ll60
-rw-r--r--test/Transforms/Mem2Reg/ConvertDebugInfo.ll14
-rw-r--r--test/Transforms/Mem2Reg/ConvertDebugInfo2.ll28
-rw-r--r--test/Transforms/Mem2Reg/optnone.ll21
-rw-r--r--test/Transforms/Mem2Reg/pr24179.ll44
-rw-r--r--test/Transforms/MemCpyOpt/memcpy.ll5
-rw-r--r--test/Transforms/MemCpyOpt/nontemporal.ll49
-rw-r--r--test/Transforms/MergeFunc/apply_function_attributes.ll47
-rw-r--r--test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll18
-rw-r--r--test/Transforms/MergeFunc/constant-entire-value.ll42
-rw-r--r--test/Transforms/MergeFunc/crash2.ll54
-rw-r--r--test/Transforms/MergeFunc/gep-base-type.ll46
-rw-r--r--test/Transforms/MergeFunc/inttoptr-address-space.ll2
-rw-r--r--test/Transforms/MergeFunc/inttoptr.ll2
-rw-r--r--test/Transforms/MergeFunc/merge-block-address-other-function.ll49
-rw-r--r--test/Transforms/MergeFunc/merge-block-address.ll91
-rw-r--r--test/Transforms/MergeFunc/merge-const-ptr-and-int.ll20
-rw-r--r--test/Transforms/MergeFunc/merge-different-vector-types.ll18
-rw-r--r--test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll96
-rw-r--r--test/Transforms/MergeFunc/no-merge-block-address-other-function.ll61
-rw-r--r--test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll24
-rw-r--r--test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll23
-rw-r--r--test/Transforms/MergeFunc/ranges-multiple.ll44
-rw-r--r--test/Transforms/MergeFunc/ranges.ll8
-rw-r--r--test/Transforms/MergeFunc/self-referential-global.ll40
-rw-r--r--test/Transforms/MergeFunc/undef-different-types.ll21
-rw-r--r--test/Transforms/MetaRenamer/metarenamer.ll2
-rw-r--r--test/Transforms/NaryReassociate/NVPTX/nary-gep.ll17
-rw-r--r--test/Transforms/NaryReassociate/nary-add.ll6
-rw-r--r--test/Transforms/NaryReassociate/nary-mul.ll19
-rw-r--r--test/Transforms/NaryReassociate/pr24301.ll14
-rw-r--r--test/Transforms/ObjCARC/basic.ll6
-rw-r--r--test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll18
-rw-r--r--test/Transforms/ObjCARC/nested.ll4
-rw-r--r--test/Transforms/ObjCARC/provenance.ll2
-rw-r--r--test/Transforms/PGOProfile/Inputs/branch1.proftext6
-rw-r--r--test/Transforms/PGOProfile/Inputs/branch2.proftext6
-rw-r--r--test/Transforms/PGOProfile/Inputs/criticaledge.proftext17
-rw-r--r--test/Transforms/PGOProfile/Inputs/diag.proftext5
-rw-r--r--test/Transforms/PGOProfile/Inputs/landingpad.proftext14
-rw-r--r--test/Transforms/PGOProfile/Inputs/loop1.proftext6
-rw-r--r--test/Transforms/PGOProfile/Inputs/loop2.proftext7
-rw-r--r--test/Transforms/PGOProfile/Inputs/switch.proftext8
-rw-r--r--test/Transforms/PGOProfile/branch1.ll30
-rw-r--r--test/Transforms/PGOProfile/branch2.ll37
-rw-r--r--test/Transforms/PGOProfile/criticaledge.ll108
-rw-r--r--test/Transforms/PGOProfile/diag_mismatch.ll12
-rw-r--r--test/Transforms/PGOProfile/diag_no_funcprofdata.ll12
-rw-r--r--test/Transforms/PGOProfile/diag_no_profile.ll9
-rw-r--r--test/Transforms/PGOProfile/landingpad.ll124
-rw-r--r--test/Transforms/PGOProfile/loop1.ll42
-rw-r--r--test/Transforms/PGOProfile/loop2.ll70
-rw-r--r--test/Transforms/PGOProfile/single_bb.ll12
-rw-r--r--test/Transforms/PGOProfile/switch.ll47
-rw-r--r--test/Transforms/PlaceSafepoints/basic.ll2
-rw-r--r--test/Transforms/PlaceSafepoints/call_gc_result.ll4
-rw-r--r--test/Transforms/PlaceSafepoints/finite-loops.ll65
-rw-r--r--test/Transforms/PlaceSafepoints/patchable-statepoints.ll4
-rw-r--r--test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll4
-rw-r--r--test/Transforms/PlaceSafepoints/statepoint-format.ll4
-rw-r--r--test/Transforms/PruneEH/operand-bundles.ll26
-rw-r--r--test/Transforms/Reassociate/fast-ReassociateVector.ll10
-rw-r--r--test/Transforms/Reassociate/fast-basictest.ll2
-rw-r--r--test/Transforms/Reassociate/fast-fp-commute.ll4
-rw-r--r--test/Transforms/Reassociate/fast-multistep.ll6
-rw-r--r--test/Transforms/Reassociate/fp-expr.ll33
-rw-r--r--test/Transforms/Reassociate/multistep.ll6
-rw-r--r--test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll31
-rw-r--r--test/Transforms/Reassociate/secondary.ll2
-rw-r--r--test/Transforms/Reassociate/vaarg_movable.ll28
-rw-r--r--test/Transforms/Reassociate/xor_reassoc.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll11
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll21
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll10
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers.ll73
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-vector.ll167
-rw-r--r--test/Transforms/RewriteStatepointsForGC/basics.ll16
-rw-r--r--test/Transforms/RewriteStatepointsForGC/codegen-cond.ll74
-rw-r--r--test/Transforms/RewriteStatepointsForGC/constants.ll43
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll25
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll35
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll24
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll44
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll28
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll45
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll151
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll167
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll65
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll88
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll81
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll51
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll104
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll22
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll149
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll165
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll44
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll62
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll32
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll279
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll150
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll32
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deref-pointers.ll53
-rw-r--r--test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll6
-rw-r--r--test/Transforms/RewriteStatepointsForGC/live-vector.ll25
-rw-r--r--test/Transforms/RewriteStatepointsForGC/liveness-basics.ll22
-rw-r--r--test/Transforms/RewriteStatepointsForGC/preprocess.ll8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocation.ll40
-rw-r--r--test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll66
-rw-r--r--test/Transforms/SCCP/global-alias-constprop.ll11
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/commute.ll2
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/horizontal.ll270
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/nontemporal.ll76
-rw-r--r--test/Transforms/SLPVectorizer/X86/bad_types.ll26
-rw-r--r--test/Transforms/SLPVectorizer/X86/commutativity.ll78
-rw-r--r--test/Transforms/SLPVectorizer/X86/debug_info.ll16
-rw-r--r--test/Transforms/SLPVectorizer/X86/horizontal.ll2
-rw-r--r--test/Transforms/SLPVectorizer/X86/pr23510.ll38
-rw-r--r--test/Transforms/SLPVectorizer/X86/schedule_budget.ll93
-rw-r--r--test/Transforms/SROA/basictest.ll25
-rw-r--r--test/Transforms/SROA/big-endian.ll1
-rw-r--r--test/Transforms/SROA/fca.ll1
-rw-r--r--test/Transforms/SafeStack/AArch64/abi.ll20
-rw-r--r--test/Transforms/SafeStack/AArch64/lit.local.cfg3
-rw-r--r--test/Transforms/SafeStack/ARM/abi.ll18
-rw-r--r--test/Transforms/SafeStack/ARM/lit.local.cfg3
-rw-r--r--test/Transforms/SafeStack/ARM/setjmp.ll34
-rw-r--r--test/Transforms/SafeStack/X86/abi.ll30
-rw-r--r--test/Transforms/SafeStack/X86/lit.local.cfg3
-rw-r--r--test/Transforms/SafeStack/array.ll53
-rw-r--r--test/Transforms/SafeStack/byval.ll51
-rw-r--r--test/Transforms/SafeStack/call.ll160
-rw-r--r--test/Transforms/SafeStack/cast.ll28
-rw-r--r--test/Transforms/SafeStack/debug-loc.ll83
-rw-r--r--test/Transforms/SafeStack/ret.ll17
-rw-r--r--test/Transforms/SafeStack/setjmp2.ll2
-rw-r--r--test/Transforms/SafeStack/store.ll63
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof2
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_fn_header.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_mangle.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_sample_line.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/bad_samples.prof2
-rw-r--r--test/Transforms/SampleProfile/Inputs/branch.prof16
-rw-r--r--test/Transforms/SampleProfile/Inputs/calls.prof16
-rw-r--r--test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof10
-rw-r--r--test/Transforms/SampleProfile/Inputs/coverage-warning.prof5
-rw-r--r--test/Transforms/SampleProfile/Inputs/discriminator.prof14
-rw-r--r--test/Transforms/SampleProfile/Inputs/entry_counts.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/fnptr.binprofbin112 -> 105 bytes
-rw-r--r--test/Transforms/SampleProfile/Inputs/fnptr.prof18
-rw-r--r--test/Transforms/SampleProfile/Inputs/gcc-simple.afdobin0 -> 1972 bytes
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline-coverage.prof7
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline-hint.prof3
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline.prof7
-rw-r--r--test/Transforms/SampleProfile/Inputs/nolocinfo.prof3
-rw-r--r--test/Transforms/SampleProfile/Inputs/offset.prof4
-rw-r--r--test/Transforms/SampleProfile/Inputs/propagate.prof32
-rw-r--r--test/Transforms/SampleProfile/Inputs/remarks.prof7
-rw-r--r--test/Transforms/SampleProfile/Inputs/syntax.prof4
-rw-r--r--test/Transforms/SampleProfile/branch.ll292
-rw-r--r--test/Transforms/SampleProfile/calls.ll18
-rw-r--r--test/Transforms/SampleProfile/cov-zero-samples.ll142
-rw-r--r--test/Transforms/SampleProfile/coverage-warning.ll46
-rw-r--r--test/Transforms/SampleProfile/discriminator.ll14
-rw-r--r--test/Transforms/SampleProfile/entry_counts.ll8
-rw-r--r--test/Transforms/SampleProfile/fnptr.ll24
-rw-r--r--test/Transforms/SampleProfile/gcc-simple.ll218
-rw-r--r--test/Transforms/SampleProfile/inline-coverage.ll135
-rw-r--r--test/Transforms/SampleProfile/inline-hint.ll38
-rw-r--r--test/Transforms/SampleProfile/inline.ll108
-rw-r--r--test/Transforms/SampleProfile/nolocinfo.ll38
-rw-r--r--test/Transforms/SampleProfile/offset.ll82
-rw-r--r--test/Transforms/SampleProfile/propagate.ll26
-rw-r--r--test/Transforms/SampleProfile/remarks.ll185
-rw-r--r--test/Transforms/SampleProfile/syntax.ll2
-rw-r--r--test/Transforms/ScalarRepl/debuginfo-preserved.ll12
-rw-r--r--test/Transforms/Scalarizer/dbginfo.ll12
-rw-r--r--test/Transforms/Scalarizer/store-bug.ll25
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll14
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll62
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll16
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll2
-rw-r--r--test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll43
-rw-r--r--test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll43
-rw-r--r--test/Transforms/SimplifyCFG/ARM/lit.local.cfg5
-rw-r--r--test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll43
-rw-r--r--test/Transforms/SimplifyCFG/Mips/lit.local.cfg5
-rw-r--r--test/Transforms/SimplifyCFG/PR25267.ll24
-rw-r--r--test/Transforms/SimplifyCFG/SpeculativeExec.ll26
-rw-r--r--test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll24
-rw-r--r--test/Transforms/SimplifyCFG/basictest.ll14
-rw-r--r--test/Transforms/SimplifyCFG/branch-fold-dbg.ll8
-rw-r--r--test/Transforms/SimplifyCFG/empty-cleanuppad.ll415
-rw-r--r--test/Transforms/SimplifyCFG/hoist-dbgvalue.ll10
-rw-r--r--test/Transforms/SimplifyCFG/implied-cond.ll81
-rw-r--r--test/Transforms/SimplifyCFG/invoke_unwind.ll13
-rw-r--r--test/Transforms/SimplifyCFG/merge-cond-stores-2.ll215
-rw-r--r--test/Transforms/SimplifyCFG/merge-cond-stores.ll241
-rw-r--r--test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll40
-rw-r--r--test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll32
-rw-r--r--test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll32
-rw-r--r--test/Transforms/SimplifyCFG/preserve-load-metadata.ll32
-rw-r--r--test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll30
-rw-r--r--test/Transforms/SimplifyCFG/speculate-math.ll45
-rw-r--r--test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll6
-rw-r--r--test/Transforms/SimplifyCFG/switch-dead-default.ll179
-rw-r--r--test/Transforms/SimplifyCFG/trap-debugloc.ll6
-rw-r--r--test/Transforms/SimplifyCFG/wineh-unreachable.ll83
-rw-r--r--test/Transforms/Sink/catchswitch.ll37
-rw-r--r--test/Transforms/Sink/landingpad.ll33
-rw-r--r--test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll4
-rw-r--r--test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll71
-rw-r--r--test/Transforms/StripDeadPrototypes/basic.ll12
-rw-r--r--test/Transforms/StripSymbols/2010-06-30-StripDebug.ll8
-rw-r--r--test/Transforms/StripSymbols/2010-08-25-crash.ll6
-rw-r--r--test/Transforms/StripSymbols/strip-dead-debug-info.ll16
-rw-r--r--test/Transforms/StructurizeCFG/nested-loop-order.ll2
-rw-r--r--test/Transforms/TailCallElim/basic.ll12
-rw-r--r--test/Transforms/TailCallElim/notail.ll24
-rw-r--r--test/Transforms/Util/lowerswitch.ll6
-rw-r--r--test/Transforms/Util/simplify-dbg-declare-load.ll52
601 files changed, 25625 insertions, 2018 deletions
diff --git a/test/Transforms/ADCE/basictest.ll b/test/Transforms/ADCE/basictest.ll
index 378d70288f3f..aaacc1842253 100644
--- a/test/Transforms/ADCE/basictest.ll
+++ b/test/Transforms/ADCE/basictest.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -adce -simplifycfg | llvm-dis
+; RUN: opt < %s -passes=adce | llvm-dis
define i32 @Test(i32 %A, i32 %B) {
BB1:
@@ -15,5 +16,3 @@ BB4: ; preds = %BB1
%X = phi i32 [ %A, %BB1 ] ; <i32> [#uses=1]
br label %BB3
}
-
-
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
index cabf707fe5d6..0588562c7377 100644
--- a/test/Transforms/AddDiscriminators/basic.ll
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -11,7 +11,7 @@
; if (i < 10) x = i;
; }
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
@@ -37,15 +37,19 @@ if.end: ; preds = %if.then, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "basic.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "basic.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -55,7 +59,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
!12 = !DILocation(line: 4, scope: !4)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/call.ll b/test/Transforms/AddDiscriminators/call.ll
new file mode 100644
index 000000000000..b123b25f2af2
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/call.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for calls that are defined in one line:
+; #1 void bar();
+; #2
+; #3 void foo() {
+; #4 bar();bar()/*discriminator 1*/;bar()/*discriminator 2*/;
+; #5 }
+
+; Function Attrs: uwtable
+define void @_Z3foov() #0 {
+ call void @_Z3barv(), !dbg !10
+; CHECK: call void @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
+ call void @_Z3barv(), !dbg !11
+; CHECK: call void @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
+ call void @_Z3barv(), !dbg !12
+; CHECK: call void @_Z3barv(), !dbg ![[CALL2:[0-9]+]]
+ ret void, !dbg !13
+}
+
+declare void @_Z3barv() #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "c.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 4, column: 9, scope: !4)
+!12 = !DILocation(line: 4, column: 15, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
+
+; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
+; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
+; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
+; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
diff --git a/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
new file mode 100644
index 000000000000..5e90d32a62eb
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -add-discriminators < %s | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; This checks whether the add-discriminators pass producess valid metadata on
+; llvm.dbg.declare instructions
+;
+; CHECK-LABEL: @test_valid_metadata
+define void @test_valid_metadata() {
+ %a = alloca i8
+ call void @llvm.dbg.declare(metadata i8* %a, metadata !2, metadata !5), !dbg !6
+ %b = alloca i8
+ call void @llvm.dbg.declare(metadata i8* %b, metadata !9, metadata !5), !dbg !11
+ ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !DILocalVariable(scope: !3)
+!3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!4 = !DIFile(filename: "a.cpp", directory: "/tmp")
+!5 = !DIExpression()
+!6 = !DILocation(line: 0, scope: !3, inlinedAt: !7)
+!7 = distinct !DILocation(line: 0, scope: !8)
+!8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+!9 = !DILocalVariable(scope: !10)
+!10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!11 = !DILocation(line: 0, scope: !10)
diff --git a/test/Transforms/AddDiscriminators/diamond.ll b/test/Transforms/AddDiscriminators/diamond.ll
new file mode 100644
index 000000000000..2ca638a83ec3
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/diamond.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for diamond-shaped CFG.:
+; #1 void bar(int);
+; #2
+; #3 void foo(int i) {
+; #4 if (i > 10)
+; #5 bar(5); else bar(3);
+; #6 }
+
+; bar(5): discriminator 0
+; bar(3): discriminator 1
+
+; Function Attrs: uwtable
+define void @_Z3fooi(i32 %i) #0 !dbg !4 {
+ %1 = alloca i32, align 4
+ store i32 %i, i32* %1, align 4
+ call void @llvm.dbg.declare(metadata i32* %1, metadata !11, metadata !12), !dbg !13
+ %2 = load i32, i32* %1, align 4, !dbg !14
+ %3 = icmp sgt i32 %2, 10, !dbg !16
+ br i1 %3, label %4, label %5, !dbg !17
+
+; <label>:4 ; preds = %0
+ call void @_Z3bari(i32 5), !dbg !18
+ br label %6, !dbg !18
+
+; <label>:5 ; preds = %0
+ call void @_Z3bari(i32 3), !dbg !19
+; CHECK: call void @_Z3bari(i32 3), !dbg ![[ELSE:[0-9]+]]
+ br label %6
+
+; <label>:6 ; preds = %5, %4
+ ret void, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @_Z3bari(i32) #2
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 253273)"}
+!11 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 3, column: 14, scope: !4)
+!14 = !DILocation(line: 4, column: 7, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!16 = !DILocation(line: 4, column: 9, scope: !15)
+!17 = !DILocation(line: 4, column: 7, scope: !4)
+!18 = !DILocation(line: 5, column: 5, scope: !15)
+!19 = !DILocation(line: 5, column: 18, scope: !15)
+!20 = !DILocation(line: 6, column: 1, scope: !4)
+
+; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
+; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
index 7f1ea2b15cf6..20d88b55e96e 100644
--- a/test/Transforms/AddDiscriminators/first-only.ll
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -13,7 +13,7 @@
; }
; }
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
@@ -46,15 +46,19 @@ if.end: ; preds = %if.then, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "first-only.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "first-only.c", directory: ".")
!6 = !DISubroutineType(types: !{null})
!7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -63,7 +67,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
; CHECK: ![[BLOCK1:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
!12 = !DILocation(line: 3, scope: !13)
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
index 621a7117571c..9a05fcd86864 100644
--- a/test/Transforms/AddDiscriminators/multiple.ll
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -10,7 +10,7 @@
; The two stores inside the if-then-else line must have different discriminator
; values.
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
entry:
%i.addr = alloca i32, align 4
%x = alloca i32, align 4
@@ -47,15 +47,19 @@ if.end: ; preds = %if.else, %if.then
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "multiple.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "multiple.c", directory: ".")
!6 = !DISubroutineType(types: !{null, !13})
!13 = !DIBasicType(encoding: DW_ATE_signed, name: "int", size: 32, align: 32)
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
index 895967e73b4d..bbba9dc62c4e 100644
--- a/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -12,7 +12,7 @@
; altered. If they are, it means that the discriminators pass added a
; new lexical scope.
-define i32 @foo(i64 %i) #0 {
+define i32 @foo(i64 %i) #0 !dbg !4 {
entry:
%retval = alloca i32, align 4
%i.addr = alloca i64, align 8
@@ -44,16 +44,20 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "no-discriminators", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i64)* @foo, variables: !2)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
!5 = !DIFile(filename: "no-discriminators", directory: ".")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9}
@@ -63,7 +67,7 @@ attributes #1 = { nounwind readnone }
; CHECK: !{i32 2, !"Dwarf Version", i32 2}
!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5.0 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
+!13 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
!14 = !DILocation(line: 1, scope: !4)
!15 = !DILocation(line: 2, scope: !16)
; CHECK: ![[ENTRY]] = !DILocation(line: 2, scope: ![[BLOCK:[0-9]+]])
diff --git a/test/Transforms/AddDiscriminators/oneline.ll b/test/Transforms/AddDiscriminators/oneline.ll
new file mode 100644
index 000000000000..ebee3935dd66
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/oneline.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for code that is written in one line:
+; #1 int foo(int i) {
+; #2 if (i == 3 || i == 5) return 100; else return 99;
+; #3 }
+
+; i == 3: discriminator 0
+; i == 5: discriminator 1
+; return 100: discriminator 2
+; return 99: discriminator 3
+
+define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ store i32 %i, i32* %2, align 4, !tbaa !13
+ call void @llvm.dbg.declare(metadata i32* %2, metadata !9, metadata !17), !dbg !18
+ %3 = load i32, i32* %2, align 4, !dbg !19, !tbaa !13
+ %4 = icmp eq i32 %3, 3, !dbg !21
+ br i1 %4, label %8, label %5, !dbg !22
+
+; <label>:5 ; preds = %0
+ %6 = load i32, i32* %2, align 4, !dbg !23, !tbaa !13
+; CHECK: %6 = load i32, i32* %2, align 4, !dbg ![[THEN1:[0-9]+]],{{.*}}
+
+ %7 = icmp eq i32 %6, 5, !dbg !24
+; CHECK: %7 = icmp eq i32 %6, 5, !dbg ![[THEN2:[0-9]+]]
+
+ br i1 %7, label %8, label %9, !dbg !25
+; CHECK: br i1 %7, label %8, label %9, !dbg ![[THEN3:[0-9]+]]
+
+; <label>:8 ; preds = %5, %0
+ store i32 100, i32* %1, align 4, !dbg !26
+; CHECK: store i32 100, i32* %1, align 4, !dbg ![[ELSE:[0-9]+]]
+
+ br label %10, !dbg !26
+; CHECK: br label %10, !dbg ![[ELSE]]
+
+; <label>:9 ; preds = %5
+ store i32 99, i32* %1, align 4, !dbg !27
+; CHECK: store i32 99, i32* %1, align 4, !dbg ![[COMBINE:[0-9]+]]
+
+ br label %10, !dbg !27
+; CHECK: br label %10, !dbg ![[COMBINE]]
+
+; <label>:10 ; preds = %9, %8
+ %11 = load i32, i32* %1, align 4, !dbg !28
+ ret i32 %11, !dbg !28
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/usr/local/google/home/dehao/discr")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.8.0 (trunk 250915)"}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"int", !15, i64 0}
+!15 = !{!"omnipotent char", !16, i64 0}
+!16 = !{!"Simple C/C++ TBAA"}
+!17 = !DIExpression()
+!18 = !DILocation(line: 1, column: 13, scope: !4)
+!19 = !DILocation(line: 2, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!21 = !DILocation(line: 2, column: 9, scope: !20)
+!22 = !DILocation(line: 2, column: 14, scope: !20)
+!23 = !DILocation(line: 2, column: 17, scope: !20)
+!24 = !DILocation(line: 2, column: 19, scope: !20)
+!25 = !DILocation(line: 2, column: 7, scope: !4)
+!26 = !DILocation(line: 2, column: 25, scope: !20)
+!27 = !DILocation(line: 2, column: 42, scope: !20)
+!28 = !DILocation(line: 3, column: 1, scope: !4)
+
+; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
+; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
+; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
+; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[THENBLOCK]])
+; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
+; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
+; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
+; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 3)
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
index 17a34cb62239..dbdccacf42ba 100644
--- a/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -1,10 +1,9 @@
; RUN: opt < %s -argpromotion -S | FileCheck %s
-; CHECK: call void @test(i32 %
-; CHECK: !DISubprogram(name: "test",{{.*}} function: void (i32)* @test
declare void @sink(i32)
-define internal void @test(i32** %X) {
+; CHECK: define internal void @test({{.*}} !dbg [[SP:![0-9]+]]
+define internal void @test(i32** %X) !dbg !2 {
%1 = load i32*, i32** %X, align 8
%2 = load i32, i32* %1, align 8
call void @sink(i32 %2)
@@ -12,16 +11,19 @@ define internal void @test(i32** %X) {
}
define void @caller(i32** %Y) {
+; CHECK: call void @test(i32 %
call void @test(i32** %Y)
ret void
}
+; CHECK: [[SP]] = distinct !DISubprogram(name: "test",
+
!llvm.module.flags = !{!0}
!llvm.dbg.cu = !{!3}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !DILocation(line: 8, scope: !2)
-!2 = !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null, function: void (i32**)* @test)
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
+!2 = distinct !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
!4 = !{!2}
!5 = !DIFile(filename: "test.c", directory: "")
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 282d42f75f05..4647e8fd6d9e 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -229,7 +229,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
@@ -241,6 +241,10 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
@@ -263,7 +267,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
@@ -275,6 +279,10 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
@@ -296,7 +304,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
@@ -307,6 +315,10 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
@@ -335,7 +347,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -350,6 +362,10 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK-NOT: dmb
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 42d7b781006d..7bb6ffed397d 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -91,7 +91,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
@@ -103,6 +103,10 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
@@ -125,7 +129,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
@@ -137,6 +141,10 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK-NOT: fence
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE]]
@@ -158,7 +166,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
@@ -169,6 +177,10 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
@@ -197,7 +209,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -212,6 +224,10 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 54653000f5d8..f9aa524fac98 100644
--- a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -9,17 +9,21 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
-; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END]]
@@ -41,7 +45,7 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -52,6 +56,10 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[END]]
@@ -73,7 +81,7 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -84,6 +92,10 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
; CHECK-NOT: dmb
; CHECK: br label %[[END:.*]]
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
; CHECK: [[FAILURE_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[END]]
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
new file mode 100644
index 000000000000..792fb1ec4f70
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
+; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
+; functionality, please move this test to a target which still is.
+
+define float @float_load_expand(float* %ptr) {
+; CHECK-LABEL: @float_load_expand
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic float, float* %ptr unordered, align 4
+ ret float %res
+}
+
+define float @float_load_expand_seq_cst(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_seq_cst
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 seq_cst, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic float, float* %ptr seq_cst, align 4
+ ret float %res
+}
+
+define float @float_load_expand_vol(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_vol
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic volatile i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic volatile float, float* %ptr unordered, align 4
+ ret float %res
+}
+
+define float @float_load_expand_addr1(float addrspace(1)* %ptr) {
+; CHECK-LABEL: @float_load_expand_addr1
+; CHECK: %1 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: %2 = load atomic i32, i32 addrspace(1)* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+ %res = load atomic float, float addrspace(1)* %ptr unordered, align 4
+ ret float %res
+}
+
+define void @float_store_expand(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 unordered, align 4
+ store atomic float %v, float* %ptr unordered, align 4
+ ret void
+}
+
+define void @float_store_expand_seq_cst(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_seq_cst
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 seq_cst, align 4
+ store atomic float %v, float* %ptr seq_cst, align 4
+ ret void
+}
+
+define void @float_store_expand_vol(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_vol
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic volatile i32 %1, i32* %2 unordered, align 4
+ store atomic volatile float %v, float* %ptr unordered, align 4
+ ret void
+}
+
+define void @float_store_expand_addr1(float addrspace(1)* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_addr1
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: store atomic i32 %1, i32 addrspace(1)* %2 unordered, align 4
+ store atomic float %v, float addrspace(1)* %ptr unordered, align 4
+ ret void
+}
+
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
new file mode 100644
index 000000000000..029a0e7b3e92
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
@@ -0,0 +1,11 @@
+; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu | FileCheck %s
+
+; This file tests the function `llvm::expandAtomicRMWToCmpXchg`.
+; It isn't technically target specific, but is exposed through a pass that is.
+
+define i8 @test_initial_load(i8* %ptr, i8 %value) {
+ %res = atomicrmw nand i8* %ptr, i8 %value seq_cst
+ ret i8 %res
+}
+; CHECK-LABEL: @test_initial_load
+; CHECK-NEXT: %1 = load i8, i8* %ptr, align 1
diff --git a/test/Transforms/AtomicExpand/X86/lit.local.cfg b/test/Transforms/AtomicExpand/X86/lit.local.cfg
new file mode 100644
index 000000000000..afde89be896d
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
index 56448c0e5471..a6ea27fc3ecb 100644
--- a/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -bb-vectorize -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -disable-basicaa -bb-vectorize -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index da7f94149414..6edf7f07ac1d 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -4,12 +4,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; Basic depth-3 chain
define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1.1 = insertelement <3 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2.2 = insertelement <3 x double> %X1.v.i1.1.1, double %B2, i32 1
-; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.2.2, double %B3, i32 2
-; CHECK: %X1.v.i0.1.3 = insertelement <3 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2.4 = insertelement <3 x double> %X1.v.i0.1.3, double %A2, i32 1
-; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.2.4, double %A3, i32 2
+; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1
+; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2
+; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1
+; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
%X3 = fsub double %A3, %B3
@@ -24,11 +24,11 @@ define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2,
; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1
%R1 = fmul double %Z1, %Z2
%R = fmul double %R1, %Z3
-; CHECK: %Z1.v.r2.10 = extractelement <3 x double> %Z1, i32 2
+; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2
; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0
; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1
; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: %R = fmul double %R1, %Z1.v.r2.10
+; CHECK: %R = fmul double %R1, %Z1.v.r210
ret double %R
; CHECK: ret double %R
}
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index d4b94fe62c71..43fcc6051210 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,13 +1,18 @@
-; RUN: opt < %s -O3 -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -O3 | llc -no-integrated-as | FileCheck %s
+; REQUIRES: default_triple
;; We don't want branch folding to fold asm directives.
+; CHECK: bork_directive
+; CHECK: bork_directive
+; CHECK-NOT: bork_directive
+
define void @bork(i32 %param) {
entry:
%tmp = icmp eq i32 %param, 0
br i1 %tmp, label %cond_true, label %cond_false
-cond_true:
+cond_true:
call void asm sideeffect ".bork_directive /* ${0:c}:${1:c} */", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 37, i32 927 )
ret void
diff --git a/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll b/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
new file mode 100644
index 000000000000..c3c11a1c4949
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck %s
+
+; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads
+; feeding a phi that zext's each loaded value.
+define i32 @test_free_zext(i32* %ptr, i32* %ptr2, i32 %c) {
+; CHECK-LABEL: @test_free_zext(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+ %load1 = load i32, i32* %ptr, align 4
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+; CHECK-LABEL: bb2:
+; CHECK: %[[T2:.*]] = load
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+ %load2 = load i32, i32* %ptr2, align 4
+ br label %bb3
+bb3:
+; CHECK-LABEL: bb3:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %bb2 ]
+ %phi = phi i32 [ %load1, %bb1 ], [ %load2, %bb2 ]
+ %and = and i32 %phi, 65535
+ ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): exercise all opcode
+; cases of active bit calculation.
+define i32 @test_free_zext2(i32* %ptr, i16* %dst16, i32* %dst32, i32 %c) {
+; CHECK-LABEL: @test_free_zext2(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+ %load1 = load i32, i32* %ptr, align 4
+ %cmp = icmp ne i32 %c, 0
+ br i1 %cmp, label %bb2, label %bb4
+bb2:
+; CHECK-LABEL: bb2:
+ %trunc = trunc i32 %load1 to i16
+ store i16 %trunc, i16* %dst16, align 2
+ br i1 %cmp, label %bb3, label %bb4
+bb3:
+; CHECK-LABEL: bb3:
+ %shl = shl i32 %load1, 16
+ store i32 %shl, i32* %dst32, align 4
+ br label %bb4
+bb4:
+; CHECK-LABEL: bb4:
+; CHECK-NOT: and
+; CHECK: ret i32 %[[A1]]
+ %and = and i32 %load1, 65535
+ ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able
+; load feeding a phi in the same block.
+define void @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i64* %c) {
+; CHECK-LABEL: @test_free_zext3(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+ %load1 = load i32, i32* %ptr, align 4
+ br label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %loop ]
+ %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ]
+ %and = and i32 %phi, 65535
+ store i32 %and, i32* %dst, align 4
+ %idx = load volatile i64, i64* %c, align 4
+ %addr = getelementptr inbounds i32, i32* %ptr2, i64 %idx
+; CHECK: %[[T2:.*]] = load i32
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+ %load2 = load i32, i32* %addr, align 4
+ %cmp = icmp ne i64 %idx, 0
+ br i1 %cmp, label %loop, label %end
+end:
+ ret void
+}
diff --git a/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll b/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
new file mode 100644
index 000000000000..172541a46080
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
@@ -0,0 +1,95 @@
+;; AArch64 is arbitralily chosen as a 32/64-bit RISC representative to show the transform in all tests.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=ARM64
+
+; AArch64 widens to 32-bit.
+
+define i32 @widen_switch_i16(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i16
+ switch i16 %trunc, label %sw.default [
+ i16 1, label %sw.bb0
+ i16 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16(
+; ARM64: %0 = zext i16 %trunc to i32
+; ARM64-NEXT: switch i32 %0, label %sw.default [
+; ARM64-NEXT: i32 1, label %return
+; ARM64-NEXT: i32 65535, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i17
+ switch i17 %trunc, label %sw.default [
+ i17 10, label %sw.bb0
+ i17 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i17(
+; ARM64: %0 = zext i17 %trunc to i32
+; ARM64-NEXT: switch i32 %0, label %sw.default [
+; ARM64-NEXT: i32 10, label %return
+; ARM64-NEXT: i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a) {
+entry:
+ switch i2 %a, label %sw.default [
+ i2 1, label %sw.bb0
+ i2 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16_sext(
+; ARM64: %0 = sext i2 %a to i32
+; ARM64-NEXT: switch i32 %0, label %sw.default [
+; ARM64-NEXT: i32 1, label %return
+; ARM64-NEXT: i32 -1, label %sw.bb1
+}
+
diff --git a/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
new file mode 100644
index 000000000000..8c5e01e3634f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
@@ -0,0 +1,118 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; The following target lines are needed for the test to exercise what it should.
+; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare void @g(i8*)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
+; blocks as the place to which the bitcast should be sunk. Since catchpads
+; do not allow non-phi instructions before the terminator, this isn't possible.
+
+; CHECK-LABEL: @test(
+define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %x = getelementptr i32, i32* %addr, i32 1
+ %p1 = bitcast i32* %x to i8*
+ invoke void @f()
+ to label %invoke.cont unwind label %catch1
+
+; CHECK: invoke.cont:
+; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
+invoke.cont:
+ %y = getelementptr i32, i32* %addr, i32 2
+ %p2 = bitcast i32* %y to i8*
+ invoke void @f()
+ to label %done unwind label %catch2
+
+done:
+ ret void
+
+catch1:
+ %cs1 = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+ %cp1 = catchpad within %cs1 []
+ br label %catch.shared
+; CHECK: handler1:
+; CHECK-NEXT: catchpad within %cs1
+; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*
+
+catch2:
+ %cs2 = catchswitch within none [label %handler2] unwind to caller
+
+handler2:
+ %cp2 = catchpad within %cs2 []
+ br label %catch.shared
+; CHECK: handler2:
+; CHECK: catchpad within %cs2
+; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*
+
+; CHECK: catch.shared:
+; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
+catch.shared:
+ %p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
+ call void @g(i8* %p)
+ unreachable
+}
+
+; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
+; there is no insertion point in a catchpad block.
+
+; CHECK-LABEL: @test_dbg_value(
+define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %a = alloca i8
+ %b = alloca i8
+ invoke void @f() to label %next unwind label %catch.dispatch
+next:
+ invoke void @f() to label %ret unwind label %catch.dispatch
+ret:
+ ret void
+
+catch.dispatch:
+ %p = phi i8* [%a, %entry], [%b, %next]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp1 = catchpad within %cs1 []
+ tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
+ call void @g(i8* %p)
+ catchret from %cp1 to label %ret
+
+; CHECK: catch.dispatch:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: catchswitch
+; CHECK-NOT: llvm.dbg.value
+
+; CHECK: catch:
+; CHECK-NEXT: catchpad
+; CHECK-NEXT: call void @llvm.dbg.value
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: null, subprograms: !3)
+!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: null)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
+!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !DIExpression()
+!14 = !DILocation(line: 2, column: 8, scope: !4)
+!15 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
new file mode 100644
index 000000000000..72d82e2a162e
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW
+; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
+; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
+
+target triple = "x86_64-unknown-unknown"
+target datalayout = "e-n32:64"
+
+; If the intrinsic is cheap, nothing should change.
+; If the intrinsic is expensive, check if the input is zero to avoid the call.
+; This is undoing speculation that may have been created by SimplifyCFG + InstCombine.
+
+define i64 @cttz(i64 %A) {
+entry:
+ %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+ ret i64 %z
+
+; SLOW-LABEL: @cttz(
+; SLOW: entry:
+; SLOW: %cmpz = icmp eq i64 %A, 0
+; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW: %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SLOW: br label %cond.end
+; SLOW: cond.end:
+; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW: ret i64 %ctz
+
+; FAST_TZ-LABEL: @cttz(
+; FAST_TZ: %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; FAST_TZ: ret i64 %z
+}
+
+define i64 @ctlz(i64 %A) {
+entry:
+ %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+ ret i64 %z
+
+; SLOW-LABEL: @ctlz(
+; SLOW: entry:
+; SLOW: %cmpz = icmp eq i64 %A, 0
+; SLOW: br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SLOW: br label %cond.end
+; SLOW: cond.end:
+; SLOW: %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW: ret i64 %ctz
+
+; FAST_LZ-LABEL: @ctlz(
+; FAST_LZ: %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; FAST_LZ: ret i64 %z
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
diff --git a/test/Transforms/CodeGenPrepare/X86/select.ll b/test/Transforms/CodeGenPrepare/X86/select.ll
new file mode 100644
index 000000000000..a26938ad5ee4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -0,0 +1,141 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) {
+entry:
+ %load = load double, double* %b, align 8
+ %cmp = fcmp olt double %load, %a
+ %sel = select i1 %cmp, i32 %x, i32 %y
+ ret i32 %sel
+
+; CHECK-LABEL: @no_sink(
+; CHECK: %load = load double, double* %b, align 8
+; CHECK: %cmp = fcmp olt double %load, %a
+; CHECK: br i1 %cmp, label %select.end, label %select.false
+; CHECK: select.false:
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ]
+; CHECK: ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %div, float 2.0
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK: %cmp = fcmp ogt float %a, 1.0
+; CHECK: br i1 %cmp, label %select.true.sink, label %select.end
+; CHECK: select.true.sink:
+; CHECK: %div = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
+; CHECK: ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 3.0
+ %sel = select i1 %cmp, float 4.0, float %div
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK: %cmp = fcmp ogt float %a, 3.0
+; CHECK: br i1 %cmp, label %select.end, label %select.false.sink
+; CHECK: select.false.sink:
+; CHECK: %div = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ]
+; CHECK: ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+entry:
+ %div1 = fdiv float %a, %b
+ %div2 = fdiv float %b, %a
+ %cmp = fcmp ogt float %a, 5.0
+ %sel = select i1 %cmp, float %div1, float %div2
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK: %cmp = fcmp ogt float %a, 5.0
+; CHECK: br i1 %cmp, label %select.true.sink, label %select.false.sink
+; CHECK: select.true.sink:
+; CHECK: %div1 = fdiv float %a, %b
+; CHECK: br label %select.end
+; CHECK: select.false.sink:
+; CHECK: %div2 = fdiv float %b, %a
+; CHECK: br label %select.end
+; CHECK: select.end:
+; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ]
+; CHECK: ret float %sel
+}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+ %add = fadd float %a, %b
+ %cmp = fcmp ogt float 6.0, %a
+ %sel = select i1 %cmp, float %add, float 7.0
+ ret float %sel
+
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK: %sel = select i1 %cmp, float %add, float 7.0
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+entry:
+ %div = fdiv float %a, %b
+ %add = fadd float %div, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %add, float 8.0
+ ret float %sel
+
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK: %sel = select i1 %cmp, float %add, float 8.0
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+ %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+ %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+ %tobool = icmp ne i32 %in, 0
+ %sel = select i1 %tobool, i64* %call1, i64* %call2
+ ret i64* %sel
+
+; CHECK-LABEL: @calls_no_sink(
+; CHECK: %sel = select i1 %tobool, i64* %call1, i64* %call2
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+ %div1 = sdiv i32 %a, %b
+ %div2 = sdiv i32 %b, %a
+ %cmp = icmp sgt i32 %a, 5
+ %sel = select i1 %cmp, i32 %div1, i32 %div2
+ ret i32 %sel
+
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK: %sel = select i1 %cmp, i32 %div1, i32 %div2
+}
+
diff --git a/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
new file mode 100644
index 000000000000..53c9cc073558
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
@@ -0,0 +1,95 @@
+;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86
+
+; No change for x86 because 16-bit registers are part of the architecture.
+
+define i32 @widen_switch_i16(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i16
+ switch i16 %trunc, label %sw.default [
+ i16 1, label %sw.bb0
+ i16 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16(
+; X86: %trunc = trunc i32 %a to i16
+; X86-NEXT: switch i16 %trunc, label %sw.default [
+; X86-NEXT: i16 1, label %return
+; X86-NEXT: i16 -1, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i17
+ switch i17 %trunc, label %sw.default [
+ i17 10, label %sw.bb0
+ i17 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i17(
+; X86: %0 = zext i17 %trunc to i32
+; X86-NEXT: switch i32 %0, label %sw.default [
+; X86-NEXT: i32 10, label %return
+; X86-NEXT: i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a) {
+entry:
+ switch i2 %a, label %sw.default [
+ i2 1, label %sw.bb0
+ i2 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16_sext(
+; X86: %0 = sext i2 %a to i8
+; X86-NEXT: switch i8 %0, label %sw.default [
+; X86-NEXT: i8 1, label %return
+; X86-NEXT: i8 -1, label %sw.bb1
+}
+
diff --git a/test/Transforms/CodeGenPrepare/invariant.group.ll b/test/Transforms/CodeGenPrepare/invariant.group.ll
new file mode 100644
index 000000000000..e8f1e42ddcbb
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/invariant.group.ll
@@ -0,0 +1,23 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+@tmp = global i8 0
+
+; CHECK-LABEL: define void @foo() {
+define void @foo() {
+enter:
+ ; CHECK-NOT: !invariant.group
+ ; CHECK-NOT: @llvm.invariant.group.barrier(
+ ; CHECK: %val = load i8, i8* @tmp
+ %val = load i8, i8* @tmp, !invariant.group !0
+ %ptr = call i8* @llvm.invariant.group.barrier(i8* @tmp)
+
+ ; CHECK: store i8 42, i8* @tmp
+ store i8 42, i8* %ptr, !invariant.group !0
+
+ ret void
+}
+; CHECK-LABEL: }
+
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+!0 = !{!"something"} \ No newline at end of file
diff --git a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
index b6898b373113..b31dfe7f3fa6 100644
--- a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
+++ b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
@@ -10,9 +10,9 @@ define i32 @test_sor_basic(i32* %base) gc "statepoint-example" {
; CHECK: getelementptr i32, i32* %base-new, i32 15
entry:
%ptr = getelementptr i32, i32* %base, i32 15
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
- %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -25,10 +25,10 @@ define i32 @test_sor_two_derived(i32* %base) gc "statepoint-example" {
entry:
%ptr = getelementptr i32, i32* %base, i32 15
%ptr2 = getelementptr i32, i32* %base, i32 12
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
- %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
- %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -38,9 +38,9 @@ define i32 @test_sor_ooo(i32* %base) gc "statepoint-example" {
; CHECK: getelementptr i32, i32* %base-new, i32 15
entry:
%ptr = getelementptr i32, i32* %base, i32 15
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
- %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -50,9 +50,9 @@ define i32 @test_sor_gep_smallint([3 x i32]* %base) gc "statepoint-example" {
; CHECK: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 2
entry:
%ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
- %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+ %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
@@ -62,27 +62,66 @@ define i32 @test_sor_gep_largeint([3 x i32]* %base) gc "statepoint-example" {
; CHECK-NOT: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 21
entry:
%ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
- %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+ %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_noop(i32* %base) gc "statepoint-example" {
; CHECK: getelementptr i32, i32* %base, i32 15
-; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
-; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
entry:
%ptr = getelementptr i32, i32* %base, i32 15
%ptr2 = getelementptr i32, i32* %base, i32 12
- %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
- %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
- %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
%ret = load i32, i32* %ptr-new
ret i32 %ret
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
-declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)
+define i32 @test_sor_basic_wrong_order(i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_basic_wrong_order
+; Here we have base relocate inserted after derived. Make sure that we don't
+; produce uses of the relocated base pointer before it's definition.
+entry:
+ %ptr = getelementptr i32, i32* %base, i32 15
+ ; CHECK: getelementptr i32, i32* %base, i32 15
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ ; CHECK: %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ ; CHECK-NEXT: getelementptr i32, i32* %base-new, i32 15
+ %ret = load i32, i32* %ptr-new
+ ret i32 %ret
+}
+
+define i32 @test_sor_noop_cross_bb(i1 %external-cond, i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_noop_cross_bb
+; Here base relocate doesn't dominate derived relocate. Make sure that we don't
+; produce undefined use of the relocated base pointer.
+entry:
+ %ptr = getelementptr i32, i32* %base, i32 15
+ ; CHECK: getelementptr i32, i32* %base, i32 15
+ %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+ br i1 %external-cond, label %left, label %right
+
+left:
+ %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+ %ret-new = load i32, i32* %ptr-new
+ ret i32 %ret-new
+
+right:
+ %ptr-base = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+ %ret-base = load i32, i32* %ptr-base
+ ret i32 %ret-base
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(token, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token, i32, i32)
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 7541418b06ec..e65d8b28fe7d 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,17 +1,52 @@
; RUN: opt < %s -constprop -S | FileCheck %s
; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
+declare double @acos(double)
+declare double @asin(double)
+declare double @atan(double)
+declare double @atan2(double, double)
+declare double @ceil(double)
declare double @cos(double)
-
+declare double @cosh(double)
+declare double @exp(double)
+declare double @exp2(double)
+declare double @fabs(double)
+declare double @floor(double)
+declare double @fmod(double, double)
+declare double @log(double)
+declare double @log10(double)
+declare double @pow(double, double)
declare double @sin(double)
-
+declare double @sinh(double)
+declare double @sqrt(double)
declare double @tan(double)
+declare double @tanh(double)
-declare double @sqrt(double)
-declare double @exp2(double)
+declare float @acosf(float)
+declare float @asinf(float)
+declare float @atanf(float)
+declare float @atan2f(float, float)
+declare float @ceilf(float)
+declare float @cosf(float)
+declare float @coshf(float)
+declare float @expf(float)
+declare float @exp2f(float)
+declare float @fabsf(float)
+declare float @floorf(float)
+declare float @fmodf(float, float)
+declare float @logf(float)
+declare float @log10f(float)
+declare float @powf(float, float)
+declare float @sinf(float)
+declare float @sinhf(float)
+declare float @sqrtf(float)
+declare float @tanf(float)
+declare float @tanhf(float)
define double @T() {
; CHECK-LABEL: @T(
+; FNOBUILTIN-LABEL: @T(
+
; CHECK-NOT: call
; CHECK: ret
%A = call double @cos(double 0.000000e+00)
@@ -22,6 +57,119 @@ define double @T() {
%D = call double @sqrt(double 4.000000e+00)
%c = fadd double %b, %D
+ %slot = alloca double
+ %slotf = alloca float
+; FNOBUILTIN: call
+ %1 = call double @acos(double 1.000000e+00)
+ store double %1, double* %slot
+; FNOBUILTIN: call
+ %2 = call double @asin(double 1.000000e+00)
+ store double %2, double* %slot
+; FNOBUILTIN: call
+ %3 = call double @atan(double 3.000000e+00)
+ store double %3, double* %slot
+; FNOBUILTIN: call
+ %4 = call double @atan2(double 3.000000e+00, double 4.000000e+00)
+ store double %4, double* %slot
+; FNOBUILTIN: call
+ %5 = call double @ceil(double 3.000000e+00)
+ store double %5, double* %slot
+; FNOBUILTIN: call
+ %6 = call double @cosh(double 3.000000e+00)
+ store double %6, double* %slot
+; FNOBUILTIN: call
+ %7 = call double @exp(double 3.000000e+00)
+ store double %7, double* %slot
+; FNOBUILTIN: call
+ %8 = call double @exp2(double 3.000000e+00)
+ store double %8, double* %slot
+; FNOBUILTIN: call
+ %9 = call double @fabs(double 3.000000e+00)
+ store double %9, double* %slot
+; FNOBUILTIN: call
+ %10 = call double @floor(double 3.000000e+00)
+ store double %10, double* %slot
+; FNOBUILTIN: call
+ %11 = call double @fmod(double 3.000000e+00, double 4.000000e+00)
+ store double %11, double* %slot
+; FNOBUILTIN: call
+ %12 = call double @log(double 3.000000e+00)
+ store double %12, double* %slot
+; FNOBUILTIN: call
+ %13 = call double @log10(double 3.000000e+00)
+ store double %13, double* %slot
+; FNOBUILTIN: call
+ %14 = call double @pow(double 3.000000e+00, double 4.000000e+00)
+ store double %14, double* %slot
+; FNOBUILTIN: call
+ %15 = call double @sinh(double 3.000000e+00)
+ store double %15, double* %slot
+; FNOBUILTIN: call
+ %16 = call double @tanh(double 3.000000e+00)
+ store double %16, double* %slot
+; FNOBUILTIN: call
+ %17 = call float @acosf(float 1.000000e+00)
+ store float %17, float* %slotf
+; FNOBUILTIN: call
+ %18 = call float @asinf(float 1.000000e+00)
+ store float %18, float* %slotf
+; FNOBUILTIN: call
+ %19 = call float @atanf(float 3.000000e+00)
+ store float %19, float* %slotf
+; FNOBUILTIN: call
+ %20 = call float @atan2f(float 3.000000e+00, float 4.000000e+00)
+ store float %20, float* %slotf
+; FNOBUILTIN: call
+ %21 = call float @ceilf(float 3.000000e+00)
+ store float %21, float* %slotf
+; FNOBUILTIN: call
+ %22 = call float @cosf(float 3.000000e+00)
+ store float %22, float* %slotf
+; FNOBUILTIN: call
+ %23 = call float @coshf(float 3.000000e+00)
+ store float %23, float* %slotf
+; FNOBUILTIN: call
+ %24 = call float @expf(float 3.000000e+00)
+ store float %24, float* %slotf
+; FNOBUILTIN: call
+ %25 = call float @exp2f(float 3.000000e+00)
+ store float %25, float* %slotf
+; FNOBUILTIN: call
+ %26 = call float @fabsf(float 3.000000e+00)
+ store float %26, float* %slotf
+; FNOBUILTIN: call
+ %27 = call float @floorf(float 3.000000e+00)
+ store float %27, float* %slotf
+; FNOBUILTIN: call
+ %28 = call float @fmodf(float 3.000000e+00, float 4.000000e+00)
+ store float %28, float* %slotf
+; FNOBUILTIN: call
+ %29 = call float @logf(float 3.000000e+00)
+ store float %29, float* %slotf
+; FNOBUILTIN: call
+ %30 = call float @log10f(float 3.000000e+00)
+ store float %30, float* %slotf
+; FNOBUILTIN: call
+ %31 = call float @powf(float 3.000000e+00, float 4.000000e+00)
+ store float %31, float* %slotf
+; FNOBUILTIN: call
+ %32 = call float @sinf(float 3.000000e+00)
+ store float %32, float* %slotf
+; FNOBUILTIN: call
+ %33 = call float @sinhf(float 3.000000e+00)
+ store float %33, float* %slotf
+; FNOBUILTIN: call
+ %34 = call float @sqrtf(float 3.000000e+00)
+ store float %34, float* %slotf
+; FNOBUILTIN: call
+ %35 = call float @tanf(float 3.000000e+00)
+ store float %35, float* %slotf
+; FNOBUILTIN: call
+ %36 = call float @tanhf(float 3.000000e+00)
+ store float %36, float* %slotf
+
+; FNOBUILTIN: ret
+
; PR9315
%E = call double @exp2(double 4.0)
%d = fadd double %c, %E
@@ -65,85 +213,9 @@ define double @test_intrinsic_pow() nounwind uwtable ssp {
entry:
; CHECK-LABEL: @test_intrinsic_pow(
; CHECK-NOT: call
+; CHECK: ret
%0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
ret double %0
}
-declare double @llvm.pow.f64(double, double) nounwind readonly
-
-; Shouldn't fold because of -fno-builtin
-define double @sin_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sin_(
-; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
- %1 = call double @sin(double 3.000000e+00)
- ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define double @sqrt_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sqrt_(
-; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
- %1 = call double @sqrt(double 3.000000e+00)
- ret double %1
-}
-; Shouldn't fold because of -fno-builtin
-define float @sqrtf_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sqrtf_(
-; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
- %1 = call float @sqrtf(float 3.000000e+00)
- ret float %1
-}
-declare float @sqrtf(float)
-
-; Shouldn't fold because of -fno-builtin
-define float @sinf_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sinf_(
-; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
- %1 = call float @sinf(float 3.000000e+00)
- ret float %1
-}
-declare float @sinf(float)
-
-; Shouldn't fold because of -fno-builtin
-define double @tan_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @tan_(
-; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
- %1 = call double @tan(double 3.000000e+00)
- ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define double @tanh_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @tanh_(
-; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
- %1 = call double @tanh(double 3.000000e+00)
- ret double %1
-}
-declare double @tanh(double)
-
-; Shouldn't fold because of -fno-builtin
-define double @pow_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @pow_(
-; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
- %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
- ret double %1
-}
-declare double @pow(double, double)
-
-; Shouldn't fold because of -fno-builtin
-define double @fmod_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @fmod_(
-; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
- %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
- ret double %1
-}
-declare double @fmod(double, double)
-
-; Shouldn't fold because of -fno-builtin
-define double @atan2_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @atan2_(
-; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
- %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
- ret double %1
-}
-declare double @atan2(double, double)
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/Transforms/ConstProp/insertvalue.ll b/test/Transforms/ConstProp/insertvalue.ll
index dce2b728b93b..606f7ddc679c 100644
--- a/test/Transforms/ConstProp/insertvalue.ll
+++ b/test/Transforms/ConstProp/insertvalue.ll
@@ -74,3 +74,13 @@ define i32 @test-float-Nan() {
; CHECK: @test-float-Nan
; CHECK: ret i32 2139171423
}
+
+define i16 @test-half-Nan() {
+ %A = bitcast i16 32256 to half
+ %B = insertvalue [1 x half] undef, half %A, 0
+ %C = extractvalue [1 x half] %B, 0
+ %D = bitcast half %C to i16
+ ret i16 %D
+; CHECK: @test-half-Nan
+; CHECK: ret i16 32256
+}
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 5426ad0f8adb..89387ad06ba8 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -40,13 +40,16 @@ define i16 @test2_addrspacecast() {
%r = load i16, i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
ret i16 %r
+; FIXME: Should be able to load through a constant addrspacecast.
; 0xBEEF
; LE-LABEL: @test2_addrspacecast(
-; LE: ret i16 -16657
+; XLE: ret i16 -16657
+; LE: load i16, i16 addrspace(1)* addrspacecast
; 0xDEAD
; BE-LABEL: @test2_addrspacecast(
-; BE: ret i16 -8531
+; XBE: ret i16 -8531
+; BE: load i16, i16 addrspace(1)* addrspacecast
}
; Load of second 16 bits of 32-bit value.
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index 11b0621d42d7..514c789b4701 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -25,7 +25,7 @@ declare void @helper([16 x i8]*)
; CHECK-NEXT: @var7 = internal constant [16 x i8] c"foo1bar2foo3bar\00"
; CHECK-NEXT: @var8 = private constant [16 x i8] c"foo1bar2foo3bar\00", align 16
-@var4a = alias %struct.foobar* @var4
+@var4a = alias %struct.foobar, %struct.foobar* @var4
@llvm.used = appending global [1 x %struct.foobar*] [%struct.foobar* @var4a], section "llvm.metadata"
define i32 @main() {
diff --git a/test/Transforms/CorrelatedValuePropagation/non-null.ll b/test/Transforms/CorrelatedValuePropagation/non-null.ll
index 6bb8bb07c45f..6fb4cb6e3582 100644
--- a/test/Transforms/CorrelatedValuePropagation/non-null.ll
+++ b/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -101,3 +101,63 @@ bb:
; CHECK: KEEP2
ret void
}
+
+declare void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+define void @test10(i8* %arg1, i8* %arg2, i32 %non-pointer-arg) {
+; CHECK-LABEL: @test10
+entry:
+ %is_null = icmp eq i8* %arg1, null
+ br i1 %is_null, label %null, label %non_null
+
+non_null:
+ call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+ ; CHECK: call void @test10_helper(i8* nonnull %arg1, i8* %arg2, i32 %non-pointer-arg)
+ br label %null
+
+null:
+ call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+ ; CHECK: call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+ ret void
+}
+
+declare void @test11_helper(i8* %arg)
+define void @test11(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test11
+entry:
+ %is_null = icmp eq i8* %arg1, null
+ br i1 %is_null, label %null, label %non_null
+
+non_null:
+ br label %merge
+
+null:
+ %another_arg = alloca i8
+ br label %merge
+
+merge:
+ %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+ call void @test11_helper(i8* %merged_arg)
+ ; CHECK: call void @test11_helper(i8* nonnull %merged_arg)
+ ret void
+}
+
+declare void @test12_helper(i8* %arg)
+define void @test12(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test12
+entry:
+ %is_null = icmp eq i8* %arg1, null
+ br i1 %is_null, label %null, label %non_null
+
+non_null:
+ br label %merge
+
+null:
+ %another_arg = load i8*, i8** %arg2, !nonnull !{}
+ br label %merge
+
+merge:
+ %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+ call void @test12_helper(i8* %merged_arg)
+ ; CHECK: call void @test12_helper(i8* nonnull %merged_arg)
+ ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
index e40c63919078..884cc8bdc125 100644
--- a/test/Transforms/CorrelatedValuePropagation/range.ll
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -165,3 +165,27 @@ sw.default:
%or2 = or i1 %cmp7, %cmp8
ret i1 false
}
+
+define i1 @test8(i64* %p) {
+; CHECK-LABEL: @test8
+; CHECK: ret i1 false
+ %a = load i64, i64* %p, !range !{i64 4, i64 255}
+ %res = icmp eq i64 %a, 0
+ ret i1 %res
+}
+
+define i1 @test9(i64* %p) {
+; CHECK-LABEL: @test9
+; CHECK: ret i1 true
+ %a = load i64, i64* %p, !range !{i64 0, i64 1}
+ %res = icmp eq i64 %a, 0
+ ret i1 %res
+}
+
+define i1 @test10(i64* %p) {
+; CHECK-LABEL: @test10
+; CHECK: ret i1 false
+ %a = load i64, i64* %p, !range !{i64 4, i64 8, i64 15, i64 20}
+ %res = icmp eq i64 %a, 0
+ ret i1 %res
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/select.ll b/test/Transforms/CorrelatedValuePropagation/select.ll
index d88e3e462a20..be44bdcd921d 100644
--- a/test/Transforms/CorrelatedValuePropagation/select.ll
+++ b/test/Transforms/CorrelatedValuePropagation/select.ll
@@ -71,5 +71,5 @@ for.body:
if.end:
ret i32 %sel
-; CHECK: ret i32 %[[sel]]
+; CHECK: ret i32 1
}
diff --git a/test/Transforms/CrossDSOCFI/basic.ll b/test/Transforms/CrossDSOCFI/basic.ll
new file mode 100644
index 000000000000..49b3e8f23ccf
--- /dev/null
+++ b/test/Transforms/CrossDSOCFI/basic.ll
@@ -0,0 +1,88 @@
+; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+
+; CHECK: define void @__cfi_check(i64 %[[TYPE:.*]], i8* %[[ADDR:.*]]) align 4096
+; CHECK: switch i64 %[[TYPE]], label %[[TRAP:.*]] [
+; CHECK-NEXT: i64 111, label %[[L1:.*]]
+; CHECK-NEXT: i64 222, label %[[L2:.*]]
+; CHECK-NEXT: i64 333, label %[[L3:.*]]
+; CHECK-NEXT: i64 444, label %[[L4:.*]]
+; CHECK-NEXT: {{]$}}
+
+; CHECK: [[TRAP]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-MEXT: unreachable
+
+; CHECK: [[EXIT:.*]]:
+; CHECK-NEXT: ret void
+
+; CHECK: [[L1]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 111)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK: [[L2]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 222)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK: [[L3]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 333)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK: [[L4]]:
+; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 444)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@_ZTV1A = constant i8 0
+@_ZTI1A = constant i8 0
+@_ZTS1A = constant i8 0
+@_ZTV1B = constant i8 0
+@_ZTI1B = constant i8 0
+@_ZTS1B = constant i8 0
+
+define signext i8 @f11() {
+entry:
+ ret i8 1
+}
+
+define signext i8 @f12() {
+entry:
+ ret i8 2
+}
+
+define signext i8 @f13() {
+entry:
+ ret i8 3
+}
+
+define i32 @f21() {
+entry:
+ ret i32 4
+}
+
+define i32 @f22() {
+entry:
+ ret i32 5
+}
+
+!llvm.bitsets = !{!0, !1, !2, !3, !4, !7, !8, !9, !10, !11, !12, !13, !14, !15}
+!llvm.module.flags = !{!17}
+
+!0 = !{!"_ZTSFcvE", i8 ()* @f11, i64 0}
+!1 = !{i64 111, i8 ()* @f11, i64 0}
+!2 = !{!"_ZTSFcvE", i8 ()* @f12, i64 0}
+!3 = !{i64 111, i8 ()* @f12, i64 0}
+!4 = !{!"_ZTSFcvE", i8 ()* @f13, i64 0}
+!5 = !{i64 111, i8 ()* @f13, i64 0}
+!6 = !{!"_ZTSFivE", i32 ()* @f21, i64 0}
+!7 = !{i64 222, i32 ()* @f21, i64 0}
+!8 = !{!"_ZTSFivE", i32 ()* @f22, i64 0}
+!9 = !{i64 222, i32 ()* @f22, i64 0}
+!10 = !{!"_ZTS1A", i8* @_ZTV1A, i64 16}
+!11 = !{i64 333, i8* @_ZTV1A, i64 16}
+!12 = !{!"_ZTS1A", i8* @_ZTV1B, i64 16}
+!13 = !{i64 333, i8* @_ZTV1B, i64 16}
+!14 = !{!"_ZTS1B", i8* @_ZTV1B, i64 16}
+!15 = !{i64 444, i8* @_ZTV1B, i64 16}
+!17= !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 5ff05f0d6858..10578761cd73 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -45,30 +45,30 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!30}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "name", line: 8, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
+!0 = !DILocalVariable(name: "name", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
!2 = !DIFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6, !9, !9, !9}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !28, scope: !2, baseType: !7)
!7 = !DIDerivedType(tag: DW_TAG_const_type, size: 8, align: 8, file: !28, scope: !2, baseType: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "len", line: 9, arg: 0, scope: !1, file: !2, type: !9)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hash", line: 10, arg: 0, scope: !1, file: !2, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "flags", line: 11, arg: 0, scope: !1, file: !2, type: !9)
+!10 = !DILocalVariable(name: "len", line: 9, arg: 2, scope: !1, file: !2, type: !9)
+!11 = !DILocalVariable(name: "hash", line: 10, arg: 3, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(name: "flags", line: 11, arg: 4, scope: !1, file: !2, type: !9)
!13 = !DILocation(line: 13, scope: !14)
!14 = distinct !DILexicalBlock(line: 12, column: 0, file: !28, scope: !1)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "name", line: 17, arg: 0, scope: !16, file: !2, type: !6)
-!16 = !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
+!15 = !DILocalVariable(name: "name", line: 17, arg: 1, scope: !16, file: !2, type: !6)
+!16 = distinct !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
!17 = !DISubroutineType(types: !18)
!18 = !{!6, !6, !9, !9, !19, !9}
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "len", line: 18, arg: 0, scope: !16, file: !2, type: !9)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hash", line: 19, arg: 0, scope: !16, file: !2, type: !9)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "extra", line: 20, arg: 0, scope: !16, file: !2, type: !19)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "flags", line: 21, arg: 0, scope: !16, file: !2, type: !9)
+!20 = !DILocalVariable(name: "len", line: 18, arg: 2, scope: !16, file: !2, type: !9)
+!21 = !DILocalVariable(name: "hash", line: 19, arg: 3, scope: !16, file: !2, type: !9)
+!22 = !DILocalVariable(name: "extra", line: 20, arg: 4, scope: !16, file: !2, type: !19)
+!23 = !DILocalVariable(name: "flags", line: 21, arg: 5, scope: !16, file: !2, type: !9)
!24 = !DILocation(line: 23, scope: !25)
!25 = distinct !DILexicalBlock(line: 22, column: 0, file: !28, scope: !16)
!26 = !DILocation(line: 24, scope: !25)
diff --git a/test/Transforms/DeadArgElim/aggregates.ll b/test/Transforms/DeadArgElim/aggregates.ll
index 68d253425587..2eca76a4a4e3 100644
--- a/test/Transforms/DeadArgElim/aggregates.ll
+++ b/test/Transforms/DeadArgElim/aggregates.ll
@@ -159,4 +159,28 @@ define internal i8 @outer() {
%val = load i8, i8* %resptr
ret i8 %val
-} \ No newline at end of file
+}
+
+define internal { i32 } @agg_ret() {
+entry:
+ unreachable
+}
+
+; CHECK-LABEL: define void @PR24906
+; CHECK: %[[invoke:.*]] = invoke i32 @agg_ret()
+; CHECK: %[[oldret:.*]] = insertvalue { i32 } undef, i32 %[[invoke]], 0
+; CHECK: phi { i32 } [ %[[oldret]],
+define void @PR24906() personality i32 (i32)* undef {
+entry:
+ %tmp2 = invoke { i32 } @agg_ret()
+ to label %bb3 unwind label %bb4
+
+bb3:
+ %tmp3 = phi { i32 } [ %tmp2, %entry ]
+ unreachable
+
+bb4:
+ %tmp4 = landingpad { i8*, i32 }
+ cleanup
+ unreachable
+}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index efafd9bbc9e2..a19d4b1fd1ab 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -14,20 +14,21 @@
; the function->debug info mapping on update to ensure it's accurate when used
; again for the next removal.
-; CHECK: !DISubprogram(name: "f1",{{.*}} function: void ()* @_ZL2f1iz
+; CHECK: define internal void @_ZL2f1iz({{.*}} !dbg [[SP:![0-9]+]]
+; CHECK: [[SP]] = distinct !DISubprogram(name: "f1"
; Check that debug info metadata for subprograms stores pointers to
; updated LLVM functions.
; Function Attrs: uwtable
-define void @_Z2f2v() #0 {
+define void @_Z2f2v() #0 !dbg !4 {
entry:
call void (i32, ...) @_ZL2f1iz(i32 1), !dbg !15
ret void, !dbg !16
}
; Function Attrs: nounwind uwtable
-define internal void @_ZL2f1iz(i32, ...) #1 {
+define internal void @_ZL2f1iz(i32, ...) #1 !dbg !8 {
entry:
call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !17, metadata !18), !dbg !19
ret void, !dbg !20
@@ -47,15 +48,15 @@ attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
-!8 = !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, function: void (i32, ...)* @_ZL2f1iz, variables: !2)
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{null, !11, null}
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -64,7 +65,7 @@ attributes #2 = { nounwind readnone }
!14 = !{!"clang version 3.6.0 "}
!15 = !DILocation(line: 5, column: 3, scope: !4)
!16 = !DILocation(line: 6, column: 1, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
+!17 = !DILocalVariable(name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
!18 = !DIExpression()
!19 = !DILocation(line: 1, column: 19, scope: !8)
!20 = !DILocation(line: 2, column: 1, scope: !8)
diff --git a/test/Transforms/DeadArgElim/naked_functions.ll b/test/Transforms/DeadArgElim/naked_functions.ll
new file mode 100644
index 000000000000..b7955a10127d
--- /dev/null
+++ b/test/Transforms/DeadArgElim/naked_functions.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -deadargelim %s | FileCheck %s
+
+; Don't eliminate dead arugments from naked functions.
+; CHECK: define internal i32 @naked(i32 %x)
+
+define internal i32 @naked(i32 %x) #0 {
+ tail call void asm sideeffect inteldialect "mov eax, [esp + $$4]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+ unreachable
+}
+
+
+; Don't eliminate dead varargs from naked functions.
+; CHECK: define internal i32 @naked_va(i32 %x, ...)
+
+define internal i32 @naked_va(i32 %x, ...) #0 {
+ tail call void asm sideeffect inteldialect "mov eax, [esp + $$8]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+ unreachable
+}
+
+define i32 @f(i32 %x, i32 %y) {
+ %r = call i32 @naked(i32 %x)
+ %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+; Make sure the arguments are still there: not removed or replaced with undef.
+; CHECK: %r = call i32 @naked(i32 %x)
+; CHECK: %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+ ret i32 %s
+}
+
+attributes #0 = { naked }
diff --git a/test/Transforms/DeadArgElim/operandbundle.ll b/test/Transforms/DeadArgElim/operandbundle.ll
new file mode 100644
index 000000000000..aa112b1c0501
--- /dev/null
+++ b/test/Transforms/DeadArgElim/operandbundle.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+define internal void @f(i32 %arg) {
+entry:
+ call void @g() [ "foo"(i32 %arg) ]
+ ret void
+}
+
+; CHECK-LABEL: define internal void @f(
+; CHECK: call void @g() [ "foo"(i32 %arg) ]
+
+declare void @g()
diff --git a/test/Transforms/DeadStoreElimination/calloc-store.ll b/test/Transforms/DeadStoreElimination/calloc-store.ll
new file mode 100644
index 000000000000..daba61332065
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/calloc-store.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare noalias i8* @calloc(i64, i64)
+
+define i32* @test1() {
+; CHECK-LABEL: test1
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ %2 = bitcast i8* %1 to i32*
+ ; This store is dead and should be removed
+ store i32 0, i32* %2, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+ ret i32* %2
+}
+
+define i32* @test2() {
+; CHECK-LABEL: test2
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ %2 = bitcast i8* %1 to i32*
+ %3 = getelementptr i32, i32* %2, i32 5
+ store i32 0, i32* %3, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+ ret i32* %2
+}
+
+define i32* @test3(i32 *%arg) {
+; CHECK-LABEL: test3
+ store i32 0, i32* %arg, align 4
+; CHECK: store i32 0, i32* %arg, align 4
+ ret i32* %arg
+}
+
+declare void @clobber_memory(i8*)
+define i8* @test4() {
+; CHECK-LABEL: test4
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ call void @clobber_memory(i8* %1)
+ store i8 0, i8* %1, align 4
+; CHECK: store i8 0, i8* %1, align 4
+ ret i8* %1
+}
+
+define i32* @test5() {
+; CHECK-LABEL: test5
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ %2 = bitcast i8* %1 to i32*
+ store volatile i32 0, i32* %2, align 4
+; CHECK: store volatile i32 0, i32* %2, align 4
+ ret i32* %2
+}
+
+define i8* @test6() {
+; CHECK-LABEL: test6
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ store i8 5, i8* %1, align 4
+; CHECK: store i8 5, i8* %1, align 4
+ ret i8* %1
+}
+
+define i8* @test7(i8 %arg) {
+; CHECK-LABEL: test7
+ %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+ store i8 %arg, i8* %1, align 4
+; CHECK: store i8 %arg, i8* %1, align 4
+ ret i8* %1
+}
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index 54e41c8b413b..5848ab89bc88 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@x = global i32 0, align 4
; Function Attrs: nounwind
-define i32 @test_within_limit() {
+define i32 @test_within_limit() !dbg !4 {
entry:
; The first store; later there is a second store to the same location,
; so this store should be optimized away by DSE.
@@ -245,17 +245,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !13}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/home/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @test_within_limit, variables: !2)
+!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/home/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, type: !8)
+!10 = !DILocalVariable(name: "x", scope: !4, type: !8)
!11 = !{i32 2, !"Dwarf Version", i32 4}
!12 = !{i32* undef}
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 2ffe0539098e..4f6221db2454 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -350,3 +350,150 @@ define i8* @test25(i8* %p) nounwind {
store i8 %tmp, i8* %p.4, align 1
ret i8* %q
}
+
+; Remove redundant store if loaded value is in another block.
+; CHECK-LABEL: @test26(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test26(i1 %c, i32* %p) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ store i32 %v, i32* %p, align 4
+ br label %bb3
+bb3:
+ ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block.
+; CHECK-LABEL: @test27(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test27(i1 %c, i32* %p) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+; CHECK-LABEL: @test28(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+
+ ; Might overwrite value at %p
+ store i32 %i, i32* %p2, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+; CHECK-LABEL: @test29(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ ; Might overwrite value at %p
+ store i32 %i, i32* %p2, align 4
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+declare void @unknown_func()
+
+; Don't remove redundant store because of unknown call.
+; CHECK-LABEL: @test30(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test30(i1 %c, i32* %p, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ ; Might overwrite value at %p
+ call void @unknown_func()
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block inside a loop.
+; CHECK-LABEL: @test31(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test31(i1 %c, i32* %p, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br label %bb1
+bb1:
+ store i32 %v, i32* %p, align 4
+ br i1 undef, label %bb1, label %bb2
+bb2:
+ ret i32 0
+}
+
+; Don't remove redundant store in a loop with a may-alias store.
+; CHECK-LABEL: @test32(
+; CHECK: bb1:
+; CHECK-NEXT: store i32 %v
+; CHECK-NEXT: call void @unknown_func
+define i32 @test32(i1 %c, i32* %p, i32 %i) {
+entry:
+ %v = load i32, i32* %p, align 4
+ br label %bb1
+bb1:
+ store i32 %v, i32* %p, align 4
+ ; Might read and overwrite value at %p
+ call void @unknown_func()
+ br i1 undef, label %bb1, label %bb2
+bb2:
+ ret i32 0
+}
+
+; Remove redundant store, which is in the lame loop as the load.
+; CHECK-LABEL: @test33(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test33(i1 %c, i32* %p, i32 %i) {
+entry:
+ br label %bb1
+bb1:
+ %v = load i32, i32* %p, align 4
+ br label %bb2
+bb2:
+ store i32 %v, i32* %p, align 4
+ ; Might read and overwrite value at %p, but doesn't matter.
+ call void @unknown_func()
+ br i1 undef, label %bb1, label %bb3
+bb3:
+ ret i32 0
+}
+
diff --git a/test/Transforms/EarlyCSE/AArch64/ldstN.ll b/test/Transforms/EarlyCSE/AArch64/ldstN.ll
new file mode 100644
index 000000000000..cc1af31429e1
--- /dev/null
+++ b/test/Transforms/EarlyCSE/AArch64/ldstN.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
+
+; Although the store and the ld4 are using the same pointer, the
+; data can not be reused because ld4 accesses multiple elements.
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
+entry:
+ store <4 x i16> undef, <4 x i16>* undef, align 8
+ %0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
+ ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
+; CHECK-LABEL: @foo(
+; CHECK: store
+; CHECK-NEXT: call
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/EarlyCSE/atomics.ll b/test/Transforms/EarlyCSE/atomics.ll
new file mode 100644
index 000000000000..21c19cd8e880
--- /dev/null
+++ b/test/Transforms/EarlyCSE/atomics.ll
@@ -0,0 +1,259 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+ %load0 = load i32, i32* %P1
+ %1 = load atomic i32, i32* %P2 seq_cst, align 4
+ %load1 = load i32, i32* %P1
+ %sel = select i1 %B, i32 %load0, i32 %load1
+ ret i32 %sel
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i1 %B, i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i1 %B, i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1 seq_cst
+ ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i1 %B, i32* %P1, i32* %P2) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example. We just don't
+; do that right now.)
+define i32 @test16(i1 %B, i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ store atomic i32 0, i32* %P2 seq_cst, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm sideeffect "", ""()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm "", "~{memory}"()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i1 %B, i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @volatile_load
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i1 %B, i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @redundant_volatile_load
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store
+; CHECK: store volatile
+; CHECK: store
+ store volatile i32 0, i32* %P1, align 4
+ store i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store
+; CHECK: store volatile
+; CHECK: store volatile
+ store volatile i32 0, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i1 %B, i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test20
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; Can DSE a non-volatile store in favor of a volatile one
+; currently a missed optimization
+define void @test21(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test21
+; CHECK: store
+; CHECK: store volatile
+ store i32 0, i32* %P1, align 4
+ store volatile i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can DSE a normal store in favor of a unordered one
+define void @test22(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test22
+; CHECK-NEXT: store atomic
+ store i32 0, i32* %P1, align 4
+ store atomic i32 3, i32* %P1 unordered, align 4
+ ret void
+}
+
+; Can also DSE a unordered store in favor of a normal one
+define void @test23(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test23
+; CHECK-NEXT: store i32 0
+ store atomic i32 3, i32* %P1 unordered, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; Note that we could remove the earlier store if we could
+; represent the required ordering.
+define void @test24(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test24
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store i32 0
+ store atomic i32 3, i32* %P1 release, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove volatile stores - each is independently observable and
+; the count of such stores is an observable program side effect.
+define void @test25(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test25
+; CHECK-NEXT: store volatile
+; CHECK-NEXT: store volatile
+ store volatile i32 3, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can DSE a unordered store in favor of a unordered one
+define void @test26(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test26
+; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %P1 unordered, align 4
+ store atomic i32 3, i32* %P1 unordered, align 4
+ ret void
+}
+
+; Can DSE a unordered store in favor of a ordered one,
+; but current don't due to implementation limits
+define void @test27(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test27
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %P1 unordered, align 4
+ store atomic i32 3, i32* %P1 release, align 4
+ ret void
+}
+
+; Can DSE an unordered atomic store in favor of an
+; ordered one, but current don't due to implementation limits
+define void @test28(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test28
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %P1 unordered, align 4
+ store atomic i32 3, i32* %P1 release, align 4
+ ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; see also: @test24
+define void @test29(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test29
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store atomic
+ store atomic i32 3, i32* %P1 release, align 4
+ store atomic i32 0, i32* %P1 unordered, align 4
+ ret void
+}
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 43b5e6098f6a..8c9b74b4d0e1 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -203,3 +203,77 @@ define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
; CHECK: load i32, i32* %P1
; CHECK: load i32, i32* %P1
}
+
+define void @dse1(i32 *%P) {
+; CHECK-LABEL: @dse1
+; CHECK-NOT: store
+ %v = load i32, i32* %P
+ store i32 %v, i32* %P
+ ret void
+}
+
+define void @dse2(i32 *%P) {
+; CHECK-LABEL: @dse2
+; CHECK-NOT: store
+ %v = load atomic i32, i32* %P seq_cst, align 4
+ store i32 %v, i32* %P
+ ret void
+}
+
+define void @dse3(i32 *%P) {
+; CHECK-LABEL: @dse3
+; CHECK-NOT: store
+ %v = load atomic i32, i32* %P seq_cst, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ ret void
+}
+
+define i32 @dse4(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse4
+; CHECK-NOT: store
+; CHECK: ret i32 0
+ %a = load i32, i32* %Q
+ %v = load atomic i32, i32* %P unordered, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ %b = load i32, i32* %Q
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+; Note that in this example, %P and %Q could in fact be the same
+; pointer. %v could be different than the value observed for %a
+; and that's okay because we're using relaxed memory ordering.
+; The only guarantee we have to provide is that each of the loads
+; has to observe some value written to that location. We do
+; not have to respect the order in which those writes were done.
+define i32 @dse5(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse5
+; CHECK-NOT: store
+; CHECK: ret i32 0
+ %v = load atomic i32, i32* %P unordered, align 4
+ %a = load atomic i32, i32* %Q unordered, align 4
+ store atomic i32 %v, i32* %P unordered, align 4
+ %b = load atomic i32, i32* %Q unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+
+define void @dse_neg1(i32 *%P) {
+; CHECK-LABEL: @dse_neg1
+; CHECK: store
+ %v = load i32, i32* %P
+ store i32 5, i32* %P
+ ret void
+}
+
+; Could remove the store, but only if ordering was somehow
+; encoded.
+define void @dse_neg2(i32 *%P) {
+; CHECK-LABEL: @dse_neg2
+; CHECK: store
+ %v = load i32, i32* %P
+ store atomic i32 %v, i32* %P seq_cst, align 4
+ ret void
+}
+
diff --git a/test/Transforms/EarlyCSE/fence.ll b/test/Transforms/EarlyCSE/fence.ll
new file mode 100644
index 000000000000..c6d47e9fb22e
--- /dev/null
+++ b/test/Transforms/EarlyCSE/fence.ll
@@ -0,0 +1,86 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+; NOTE: This file is testing the current implementation. Some of
+; the transforms used as negative tests below would be legal, but
+; only if reached through a chain of logic which EarlyCSE is incapable
+; of performing. To say it differently, this file tests a conservative
+; version of the memory model. If we want to extend EarlyCSE to be more
+; aggressive in the future, we may need to relax some of the negative tests.
+
+; We can value forward across the fence since we can (semantically)
+; reorder the following load before the fence.
+define i32 @test(i32* %addr.i) {
+; CHECK-LABEL: @test
+; CHECK: store
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ %a = load i32, i32* %addr.i, align 4
+ ret i32 %a
+}
+
+; Same as above
+define i32 @test2(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test2
+; CHECK: load
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+ %a = load i32, i32* %addr.i, align 4
+ fence release
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %a
+}
+
+; We can not value forward across an acquire barrier since we might
+; be syncronizing with another thread storing to the same variable
+; followed by a release fence. If this thread observed the release
+; had happened, we must present a consistent view of memory at the
+; fence. Note that it would be legal to reorder '%a' after the fence
+; and then remove '%a2'. The current implementation doesn't know how
+; to do this, but if it learned, this test will need revised.
+define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test3
+; CHECK: load
+; CHECK: fence
+; CHECK: load
+; CHECK: sub
+; CHECK: ret
+ %a = load i32, i32* %addr.i, align 4
+ fence acquire
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %res
+}
+
+; We can not dead store eliminate accross the fence. We could in
+; principal reorder the second store above the fence and then DSE either
+; store, but this is beyond the simple last-store DSE which EarlyCSE
+; implements.
+define void @test4(i32* %addr.i) {
+; CHECK-LABEL: @test4
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
+
+; We *could* DSE across this fence, but don't. No other thread can
+; observe the order of the acquire fence and the store.
+define void @test5(i32* %addr.i) {
+; CHECK-LABEL: @test5
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence acquire
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
diff --git a/test/Transforms/Float2Int/basic.ll b/test/Transforms/Float2Int/basic.ll
index f4d946914cd4..7f04a594dc80 100644
--- a/test/Transforms/Float2Int/basic.ll
+++ b/test/Transforms/Float2Int/basic.ll
@@ -254,3 +254,13 @@ define i32 @neg_calluser(i32 %value) {
ret i32 %7
}
declare double @g(double)
+
+; CHECK-LABEL: @neg_vector
+; CHECK: %1 = uitofp <4 x i8> %a to <4 x float>
+; CHECK: %2 = fptoui <4 x float> %1 to <4 x i16>
+; CHECK: ret <4 x i16> %2
+define <4 x i16> @neg_vector(<4 x i8> %a) {
+ %1 = uitofp <4 x i8> %a to <4 x float>
+ %2 = fptoui <4 x float> %1 to <4 x i16>
+ ret <4 x i16> %2
+}
diff --git a/test/Transforms/ForcedFunctionAttrs/forced.ll b/test/Transforms/ForcedFunctionAttrs/forced.ll
new file mode 100644
index 000000000000..a41e9c0efbe4
--- /dev/null
+++ b/test/Transforms/ForcedFunctionAttrs/forced.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -S -forceattrs | FileCheck %s --check-prefix=CHECK-CONTROL
+; RUN: opt < %s -S -forceattrs -force-attribute foo:noinline | FileCheck %s --check-prefix=CHECK-FOO
+; RUN: opt < %s -S -passes=forceattrs -force-attribute foo:noinline | FileCheck %s --check-prefix=CHECK-FOO
+
+; CHECK-CONTROL: define void @foo() {
+; CHECK-FOO: define void @foo() #0 {
+define void @foo() {
+ ret void
+}
+
+
+; CHECK-FOO: attributes #0 = { noinline }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index ca05d63743b5..b62698a776fb 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -10,15 +10,16 @@ define i32 @f() {
ret i32 %tmp
}
-; CHECK: define i32 @g() #0
+; CHECK: define i32 @g() #1
define i32 @g() readonly {
ret i32 0
}
-; CHECK: define i32 @h() #0
+; CHECK: define i32 @h() #1
define i32 @h() readnone {
%tmp = load i32, i32* @x ; <i32> [#uses=1]
ret i32 %tmp
}
; CHECK: attributes #0 = { readnone }
+; CHECK: attributes #1 = { norecurse readnone }
diff --git a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
deleted file mode 100644
index fa06cc718a93..000000000000
--- a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
-
-; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) #0
-declare i8* @fopen(i8*, i8*)
-
-; CHECK: declare i8 @strlen(i8* nocapture) #1
-declare i8 @strlen(i8*)
-
-; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
-declare i32* @realloc(i32*, i32)
-
-; Test deliberately wrong declaration
-declare i32 @strcpy(...)
-
-; CHECK-NOT: strcpy{{.*}}noalias
-; CHECK-NOT: strcpy{{.*}}nocapture
-; CHECK-NOT: strcpy{{.*}}nounwind
-; CHECK-NOT: strcpy{{.*}}readonly
-
-; CHECK: attributes #0 = { nounwind }
-; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index 1a64a8393804..23bb18e92b4c 100644
--- a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -4,7 +4,9 @@
@g = constant i32 1
define void @foo() {
-; CHECK: void @foo() {
+; CHECK: void @foo() #0 {
%tmp = load volatile i32, i32* @g
ret void
}
+
+; CHECK: attributes #0 = { norecurse }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index bb867011cc2a..dd915a6027f2 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -19,5 +19,5 @@ entry:
ret i32 %r
}
-; CHECK: attributes #0 = { readnone ssp uwtable }
-; CHECK: attributes #1 = { ssp uwtable }
+; CHECK: attributes #0 = { norecurse readnone ssp uwtable }
+; CHECK: attributes #1 = { norecurse ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/nonnull.ll b/test/Transforms/FunctionAttrs/nonnull.ll
new file mode 100644
index 000000000000..1fb64b7434ab
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/nonnull.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -functionattrs %s | FileCheck %s
+declare nonnull i8* @ret_nonnull()
+
+; Return a pointer trivially nonnull (call return attribute)
+define i8* @test1() {
+; CHECK: define nonnull i8* @test1
+ %ret = call i8* @ret_nonnull()
+ ret i8* %ret
+}
+
+; Return a pointer trivially nonnull (argument attribute)
+define i8* @test2(i8* nonnull %p) {
+; CHECK: define nonnull i8* @test2
+ ret i8* %p
+}
+
+; Given an SCC where one of the functions can not be marked nonnull,
+; can we still mark the other one which is trivially nonnull
+define i8* @scc_binder() {
+; CHECK: define i8* @scc_binder
+ call i8* @test3()
+ ret i8* null
+}
+
+define i8* @test3() {
+; CHECK: define nonnull i8* @test3
+ call i8* @scc_binder()
+ %ret = call i8* @ret_nonnull()
+ ret i8* %ret
+}
+
+; Given a mutual recursive set of functions, we can mark them
+; nonnull if neither can ever return null. (In this case, they
+; just never return period.)
+define i8* @test4_helper() {
+; CHECK: define noalias nonnull i8* @test4_helper
+ %ret = call i8* @test4()
+ ret i8* %ret
+}
+
+define i8* @test4() {
+; CHECK: define noalias nonnull i8* @test4
+ %ret = call i8* @test4_helper()
+ ret i8* %ret
+}
+
+; Given a mutual recursive set of functions which *can* return null
+; make sure we haven't marked them as nonnull.
+define i8* @test5_helper() {
+; CHECK: define noalias i8* @test5_helper
+ %ret = call i8* @test5()
+ ret i8* null
+}
+
+define i8* @test5() {
+; CHECK: define noalias i8* @test5
+ %ret = call i8* @test5_helper()
+ ret i8* %ret
+}
+
+; Local analysis, but going through a self recursive phi
+define i8* @test6() {
+entry:
+; CHECK: define nonnull i8* @test6
+ %ret = call i8* @ret_nonnull()
+ br label %loop
+loop:
+ %phi = phi i8* [%ret, %entry], [%phi, %loop]
+ br i1 undef, label %loop, label %exit
+exit:
+ ret i8* %phi
+}
+
+
diff --git a/test/Transforms/FunctionAttrs/norecurse.ll b/test/Transforms/FunctionAttrs/norecurse.ll
new file mode 100644
index 000000000000..47481191d278
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/norecurse.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
+
+; CHECK: define i32 @leaf() #0
+define i32 @leaf() {
+ ret i32 1
+}
+
+; CHECK: define i32 @self_rec() #1
+define i32 @self_rec() {
+ %a = call i32 @self_rec()
+ ret i32 4
+}
+
+; CHECK: define i32 @indirect_rec() #1
+define i32 @indirect_rec() {
+ %a = call i32 @indirect_rec2()
+ ret i32 %a
+}
+; CHECK: define i32 @indirect_rec2() #1
+define i32 @indirect_rec2() {
+ %a = call i32 @indirect_rec()
+ ret i32 %a
+}
+
+; CHECK: define i32 @extern() #1
+define i32 @extern() {
+ %a = call i32 @k()
+ ret i32 %a
+}
+declare i32 @k() readnone
+
+; CHECK: define internal i32 @called_by_norecurse() #0
+define internal i32 @called_by_norecurse() {
+ %a = call i32 @k()
+ ret i32 %a
+}
+define void @m() norecurse {
+ %a = call i32 @called_by_norecurse()
+ ret void
+}
+
+; CHECK: define internal i32 @called_by_norecurse_indirectly() #0
+define internal i32 @called_by_norecurse_indirectly() {
+ %a = call i32 @k()
+ ret i32 %a
+}
+define internal void @o() {
+ %a = call i32 @called_by_norecurse_indirectly()
+ ret void
+}
+define void @p() norecurse {
+ call void @o()
+ ret void
+}
+
+; CHECK: attributes #0 = { norecurse readnone }
+; CHECK: attributes #1 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/optnone.ll b/test/Transforms/FunctionAttrs/optnone.ll
index 7694bfe13aa5..441ff4da65ec 100644
--- a/test/Transforms/FunctionAttrs/optnone.ll
+++ b/test/Transforms/FunctionAttrs/optnone.ll
@@ -16,9 +16,11 @@ define void @test_optnone(i8* %p) noinline optnone {
declare i8 @strlen(i8*) noinline optnone
; CHECK-LABEL: @strlen
-; CHECK: (i8*) #1
+; CHECK: (i8*) #2
; CHECK-LABEL: attributes #0
-; CHECK: = { readnone }
+; CHECK: = { norecurse readnone }
; CHECK-LABEL: attributes #1
+; CHECK: = { noinline norecurse optnone }
+; CHECK-LABEL: attributes #2
; CHECK: = { noinline optnone }
diff --git a/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll b/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
new file mode 100644
index 000000000000..db9a895f97ea
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
@@ -0,0 +1,30 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+
+; This checks for an iterator wraparound bug in FunctionAttrs. The previous
+; "incorrect" behavior was inferring readonly for the %x argument in @caller.
+; Inferring readonly for %x *is* actually correct, since @va_func is marked
+; readonly, but FunctionAttrs was inferring readonly for the wrong reasons (and
+; we _need_ the readonly on @va_func to trigger the problematic code path). It
+; is possible that in the future FunctionAttrs becomes smart enough to infer
+; readonly for %x for the right reasons, and at that point this test will have
+; to be marked invalid.
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+
+define void @va_func(i32* readonly %b, ...) readonly nounwind {
+; CHECK-LABEL: define void @va_func(i32* nocapture readonly %b, ...)
+ entry:
+ %valist = alloca i8
+ call void @llvm.va_start(i8* %valist)
+ call void @llvm.va_end(i8* %valist)
+ %x = call i32 @caller(i32* %b)
+ ret void
+}
+
+define i32 @caller(i32* %x) {
+; CHECK-LABEL: define i32 @caller(i32* nocapture %x)
+ entry:
+ call void(i32*,...) @va_func(i32* null, i32 0, i32 0, i32 0, i32* %x)
+ ret i32 42
+}
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 7f22e6f2a2c5..aabdfe8d2005 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -65,3 +65,41 @@ entry:
store i32 10, i32* %call, align 4
ret void
}
+
+; CHECK: declare void @llvm.masked.scatter
+declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
+
+; CHECK-NOT: readnone
+; CHECK-NOT: readonly
+; CHECK: define void @test9
+define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
+ call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
+ ret void
+}
+
+; CHECK: declare <4 x i32> @llvm.masked.gather
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+; CHECK: readonly
+; CHECK: define <4 x i32> @test10
+define <4 x i32> @test10(<4 x i32*> %ptrs) {
+ %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
+ ret <4 x i32> %res
+}
+
+; CHECK: declare <4 x i32> @test11_1
+declare <4 x i32> @test11_1(<4 x i32*>) argmemonly nounwind readonly
+; CHECK: readonly
+; CHECK-NOT: readnone
+; CHECK: define <4 x i32> @test11_2
+define <4 x i32> @test11_2(<4 x i32*> %ptrs) {
+ %res = call <4 x i32> @test11_1(<4 x i32*> %ptrs)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind
+; CHECK-NOT: readnone
+; CHECK: define <4 x i32> @test12_2
+define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
+ %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
+ ret <4 x i32> %res
+}
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport.ll b/test/Transforms/FunctionImport/Inputs/funcimport.ll
new file mode 100644
index 000000000000..96555892fe3c
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport.ll
@@ -0,0 +1,87 @@
+@globalvar = global i32 1, align 4
+@staticvar = internal global i32 1, align 4
+@staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4
+@commonvar = common global i32 0, align 4
+@P = internal global void ()* null, align 8
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+ ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+ ret void
+}
+
+define linkonce_odr void @linkoncefunc() #0 {
+entry:
+ ret void
+}
+
+define i32 @referencestatics(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %call = call i32 @staticfunc()
+ %0 = load i32, i32* @staticvar, align 4
+ %add = add nsw i32 %call, %0
+ %1 = load i32, i32* %i.addr, align 4
+ %idxprom = sext i32 %1 to i64
+ %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom
+ %2 = load i32, i32* %arrayidx, align 4
+ %add1 = add nsw i32 %add, %2
+ ret i32 %add1
+}
+
+define i32 @referenceglobals(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ call void @globalfunc1()
+ %0 = load i32, i32* @globalvar, align 4
+ ret i32 %0
+}
+
+define i32 @referencecommon(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* @commonvar, align 4
+ ret i32 %0
+}
+
+define void @setfuncptr() #0 {
+entry:
+ store void ()* @staticfunc2, void ()** @P, align 8
+ ret void
+}
+
+define void @callfuncptr() #0 {
+entry:
+ %0 = load void ()*, void ()** @P, align 8
+ call void %0()
+ ret void
+}
+
+@weakvar = weak global i32 1, align 4
+define weak void @weakfunc() #0 {
+entry:
+ ret void
+}
+
+define internal i32 @staticfunc() #0 {
+entry:
+ ret i32 1
+}
+
+define internal void @staticfunc2() #0 {
+entry:
+ ret void
+}
+
+
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
new file mode 100644
index 000000000000..35c62a262903
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
@@ -0,0 +1,27 @@
+; ModuleID = 'funcimport_debug.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @func() #0 !dbg !4 {
+entry:
+ ret void, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/Transforms/FunctionImport/funcimport.ll b/test/Transforms/FunctionImport/funcimport.ll
new file mode 100644
index 000000000000..c099b9766477
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport.ll
@@ -0,0 +1,75 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIMDEF
+
+; Test import with smaller instruction limit
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -import-instr-limit=5 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM5
+; INSTLIM5-NOT: @staticfunc.llvm.2
+
+define i32 @main() #0 {
+entry:
+ call void (...) @weakalias()
+ call void (...) @analias()
+ call void (...) @linkoncealias()
+ %call = call i32 (...) @referencestatics()
+ %call1 = call i32 (...) @referenceglobals()
+ %call2 = call i32 (...) @referencecommon()
+ call void (...) @setfuncptr()
+ call void (...) @callfuncptr()
+ call void (...) @weakfunc()
+ ret i32 0
+}
+
+; Won't import weak alias
+; CHECK-DAG: declare void @weakalias
+declare void @weakalias(...) #1
+
+; Cannot create an alias to available_externally
+; CHECK-DAG: declare void @analias
+declare void @analias(...) #1
+
+; Aliases import the aliasee function
+declare void @linkoncealias(...) #1
+; CHECK-DAG: define linkonce_odr void @linkoncefunc()
+; CHECK-DAG: @linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*
+
+; INSTLIMDEF-DAG: define available_externally i32 @referencestatics(i32 %i)
+; INSTLIM5-DAG: declare i32 @referencestatics(...)
+declare i32 @referencestatics(...) #1
+
+; The import of referencestatics will expose call to staticfunc that
+; should in turn be imported as a promoted/renamed and hidden function.
+; Ensure that the call is to the properly-renamed function.
+; INSTLIMDEF-DAG: %call = call i32 @staticfunc.llvm.2()
+; INSTLIMDEF-DAG: define available_externally hidden i32 @staticfunc.llvm.2()
+
+; CHECK-DAG: define available_externally i32 @referenceglobals(i32 %i)
+declare i32 @referenceglobals(...) #1
+
+; The import of referenceglobals will expose call to globalfunc1 that
+; should in turn be imported.
+; CHECK-DAG: define available_externally void @globalfunc1()
+
+; CHECK-DAG: define available_externally i32 @referencecommon(i32 %i)
+declare i32 @referencecommon(...) #1
+
+; CHECK-DAG: define available_externally void @setfuncptr()
+declare void @setfuncptr(...) #1
+
+; CHECK-DAG: define available_externally void @callfuncptr()
+declare void @callfuncptr(...) #1
+
+; Ensure that all uses of local variable @P which has used in setfuncptr
+; and callfuncptr are to the same promoted/renamed global.
+; CHECK-DAG: @P.llvm.2 = available_externally hidden global void ()* null
+; CHECK-DAG: %0 = load void ()*, void ()** @P.llvm.2,
+; CHECK-DAG: store void ()* @staticfunc2.llvm.2, void ()** @P.llvm.2,
+
+; Won't import weak func
+; CHECK-DAG: declare void @weakfunc(...)
+declare void @weakfunc(...) #1
+
diff --git a/test/Transforms/FunctionImport/funcimport_debug.ll b/test/Transforms/FunctionImport/funcimport_debug.ll
new file mode 100644
index 000000000000..c57b5e14af1b
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport_debug.ll
@@ -0,0 +1,45 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport_debug.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now and confirm that metadata is linked for imported function.
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
+
+; CHECK: define available_externally void @func()
+; CHECK: distinct !DISubprogram(name: "main"
+; CHECK: distinct !DISubprogram(name: "func"
+
+; ModuleID = 'funcimport_debug.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+ call void (...) @func(), !dbg !11
+ ret i32 0, !dbg !12
+}
+
+declare void @func(...) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 1, scope: !4)
diff --git a/test/Transforms/GCOVProfiling/function-numbering.ll b/test/Transforms/GCOVProfiling/function-numbering.ll
index 5a704e4d047b..f94d5ad30bbc 100644
--- a/test/Transforms/GCOVProfiling/function-numbering.ll
+++ b/test/Transforms/GCOVProfiling/function-numbering.ll
@@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx10.10.0"
; GCNO-NOT: == bar ({{[0-9]+}}) @
; GCNO: == baz (1) @
-define void @foo() {
+define void @foo() !dbg !4 {
ret void, !dbg !12
}
-define void @bar() {
+define void @bar() !dbg !7 {
; This function is referenced by the debug info, but no lines have locations.
ret void
}
-define void @baz() {
+define void @baz() !dbg !8 {
ret void, !dbg !13
}
@@ -40,15 +40,15 @@ define void @baz() {
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
!2 = !{}
!3 = !{!4, !7, !8}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @bar, variables: !2)
-!8 = !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @baz, variables: !2)
+!7 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.6.0 "}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
index 29c46d6c2107..47600c7bfcad 100644
--- a/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -8,7 +8,7 @@
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
; Function Attrs: nounwind
-define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+define internal void @__cxx_global_var_init() #0 section ".text.startup" !dbg !4 {
entry:
br label %0
@@ -38,15 +38,15 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
!llvm.gcov = !{!16}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
!2 = !{}
!3 = !{!4, !8}
-!4 = !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: void ()* @__cxx_global_var_init, variables: !2)
+!4 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
!6 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
!7 = !DISubroutineType(types: !2)
-!8 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, function: void ()* @_GLOBAL__sub_I_global-ctor.ll, variables: !2)
+!8 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, variables: !2)
!9 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index 9e172b752d78..e071c4e6dbf7 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.vector = type { i8 }
; Function Attrs: nounwind
-define i32 @_Z4testv() #0 {
+define i32 @_Z4testv() #0 !dbg !15 {
entry:
%retval = alloca i32, align 4
%__range = alloca %struct.vector*, align 8
@@ -75,7 +75,7 @@ declare i8* @_ZN6vector3endEv(%struct.vector*) #2
declare void @llvm.trap() #3
; Function Attrs: nounwind
-define void @_Z2f1v() #0 {
+define void @_Z2f1v() #0 !dbg !20 {
entry:
br label %0
@@ -93,7 +93,7 @@ attributes #3 = { noreturn nounwind }
!llvm.gcov = !{!25}
!llvm.ident = !{!26}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "PATTERN")
!2 = !{}
!3 = !{!4}
@@ -108,29 +108,29 @@ attributes #3 = { noreturn nounwind }
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS6vector")
!13 = !DISubprogram(name: "end", linkageName: "_ZN6vector3endEv", line: 26, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 26, file: !5, scope: !"_ZTS6vector", type: !8)
!14 = !{!15, !20}
-!15 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, function: i32 ()* @_Z4testv, variables: !2)
+!15 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "linezero.cc", directory: "PATTERN")
!17 = !DISubroutineType(types: !18)
!18 = !{!19}
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, function: void ()* @_Z2f1v, variables: !2)
+!20 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, variables: !2)
!21 = !DISubroutineType(types: !22)
!22 = !{null}
!23 = !{i32 2, !"Dwarf Version", i32 4}
!24 = !{i32 2, !"Debug Info Version", i32 3}
!25 = !{!"PATTERN/linezero.o", !0}
!26 = !{!"clang version 3.5.0 (trunk 209871)"}
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
+!27 = !DILocalVariable(name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
!28 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !15)
!29 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !"_ZTS6vector")
!30 = !DILocation(line: 0, scope: !28)
!31 = !DILocation(line: 51, scope: !28)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__end", flags: DIFlagArtificial, scope: !28, type: !10)
+!32 = !DILocalVariable(name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
+!33 = !DILocalVariable(name: "__end", flags: DIFlagArtificial, scope: !28, type: !10)
!34 = !DILocation(line: 51, scope: !35)
!35 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !36)
!36 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !28)
-!37 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "spec", line: 51, scope: !28, file: !16, type: !11)
+!37 = !DILocalVariable(name: "spec", line: 51, scope: !28, file: !16, type: !11)
!38 = !DILocation(line: 51, scope: !39)
!39 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !28)
!40 = !DILocation(line: 51, scope: !41)
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 7a4119802c9a..65830bf78025 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -4,7 +4,7 @@
; RUN: grep _Z3foov %T/linkagename.gcno
; RUN: rm %T/linkagename.gcno
-define void @_Z3foov() {
+define void @_Z3foov() !dbg !5 {
entry:
ret void, !dbg !8
}
@@ -13,12 +13,12 @@ entry:
!llvm.module.flags = !{!10}
!llvm.gcov = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
!1 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
!2 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
!3 = !{}
!4 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, function: void ()* @_Z3foov, variables: !3)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, variables: !3)
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GCOVProfiling/return-block.ll b/test/Transforms/GCOVProfiling/return-block.ll
index 38b5b75e3c2d..9b502a14bfa2 100644
--- a/test/Transforms/GCOVProfiling/return-block.ll
+++ b/test/Transforms/GCOVProfiling/return-block.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu"
@A = common global i32 0, align 4
; Function Attrs: nounwind uwtable
-define void @test() #0 {
+define void @test() #0 !dbg !4 {
entry:
tail call void (...) @f() #2, !dbg !14
%0 = load i32, i32* @A, align 4, !dbg !15
@@ -44,11 +44,11 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
!1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: void ()* @test, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 487e72123cb0..67bfb3c97612 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -8,7 +8,7 @@
; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
; RUN: rm %T/version.gcno
-define void @test() {
+define void @test() !dbg !5 {
ret void, !dbg !8
}
@@ -16,11 +16,11 @@ define void @test() {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
!2 = !DIFile(filename: "version", directory: "/usr/local/google/home/nlewycky")
!3 = !{}
!4 = !{!5}
-!5 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, function: void ()* @test, variables: !3)
+!5 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, variables: !3)
!6 = !DIFile(filename: "<stdin>", directory: ".")
!7 = !DISubroutineType(types: !{null})
!8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index fdf17e0b46df..a0cf92989b81 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -17,20 +17,20 @@ target triple = "i386-pc-linux-gnu"
%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
-@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_init ; <i32 (%struct.__sched_param*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype ; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy ; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32), i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*), i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_init ; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32), i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype ; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy ; <i32 (%struct.__sched_param*)*> [#uses=0]
declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
diff --git a/test/Transforms/GVN/assume-equal.ll b/test/Transforms/GVN/assume-equal.ll
new file mode 100644
index 000000000000..f9304a8fc7c6
--- /dev/null
+++ b/test/Transforms/GVN/assume-equal.ll
@@ -0,0 +1,235 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+@_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8
+@_ZTI1A = external constant i8*
+
+; Checks if indirect calls can be replaced with direct
+; assuming that %vtable == @_ZTV1A (with alignment).
+; Checking const propagation across other BBs
+; CHECK-LABEL: define void @_Z1gb(
+
+define void @_Z1gb(i1 zeroext %p) {
+entry:
+ %call = tail call noalias i8* @_Znwm(i64 8) #4
+ %0 = bitcast i8* %call to %struct.A*
+ tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+ %1 = bitcast i8* %call to i8***
+ %vtable = load i8**, i8*** %1, align 8
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+ tail call void @llvm.assume(i1 %cmp.vtables)
+ br i1 %p, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+ %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+
+ ; CHECK: call i32 @_ZN1A3fooEv(
+ %call2 = tail call i32 %2(%struct.A* %0) #1
+
+ br label %if.end
+
+if.else: ; preds = %entry
+ %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+ %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+
+ ; CHECK: call i32 @_ZN1A3barEv(
+ %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+
+ %call5 = tail call i32 %3(%struct.A* %0) #1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+; Check integration with invariant.group handling
+; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) {
+define void @invariantGroupHandling(i1 zeroext %p) {
+entry:
+ %call = tail call noalias i8* @_Znwm(i64 8) #4
+ %0 = bitcast i8* %call to %struct.A*
+ tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+ %1 = bitcast i8* %call to i8***
+ %vtable = load i8**, i8*** %1, align 8, !invariant.group !0
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+ tail call void @llvm.assume(i1 %cmp.vtables)
+ br i1 %p, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+ %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+
+; CHECK: call i32 @_ZN1A3fooEv(
+ %call2 = tail call i32 %2(%struct.A* %0) #1
+ %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
+ %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
+ %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
+; FIXME: those loads could be also direct, but right now the invariant.group
+; analysis works only on single block
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+ %callx = tail call i32 %call1(%struct.A* %0) #1
+
+ %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
+ %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
+ %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+ %cally = tail call i32 %call4(%struct.A* %0) #1
+
+ %b = bitcast i8* %call to %struct.A**
+ %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
+ %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
+ %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+ %unknown = tail call i32 %vfun(%struct.A* %0) #1
+
+ br label %if.end
+
+if.else: ; preds = %entry
+ %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+ %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+
+ ; CHECK: call i32 @_ZN1A3barEv(
+ %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+
+ %call5 = tail call i32 %3(%struct.A* %0) #1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+
+; Checking const propagation in the same BB
+; CHECK-LABEL: define i32 @main()
+
+define i32 @main() {
+entry:
+ %call = tail call noalias i8* @_Znwm(i64 8)
+ %0 = bitcast i8* %call to %struct.A*
+ tail call void @_ZN1AC1Ev(%struct.A* %0)
+ %1 = bitcast i8* %call to i8***
+ %vtable = load i8**, i8*** %1, align 8
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+ tail call void @llvm.assume(i1 %cmp.vtables)
+ %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+
+ ; CHECK: call i32 @_ZN1A3fooEv(
+ %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+
+ %call2 = tail call i32 %2(%struct.A* %0)
+ ret i32 0
+}
+
+; This tests checks const propatation with fcmp instruction.
+; CHECK-LABEL: define float @_Z1gf(float %p)
+
+define float @_Z1gf(float %p) {
+entry:
+ %p.addr = alloca float, align 4
+ %f = alloca float, align 4
+ store float %p, float* %p.addr, align 4
+
+ store float 3.000000e+00, float* %f, align 4
+ %0 = load float, float* %p.addr, align 4
+ %1 = load float, float* %f, align 4
+ %cmp = fcmp oeq float %1, %0 ; note const on lhs
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: ret float 3.000000e+00
+ ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1hf(float %p)
+
+define float @_Z1hf(float %p) {
+entry:
+ %p.addr = alloca float, align 4
+ store float %p, float* %p.addr, align 4
+
+ %0 = load float, float* %p.addr, align 4
+ %cmp = fcmp nnan ueq float %0, 3.000000e+00
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: ret float 3.000000e+00
+ ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1if(float %p)
+define float @_Z1if(float %p) {
+entry:
+ %p.addr = alloca float, align 4
+ store float %p, float* %p.addr, align 4
+
+ %0 = load float, float* %p.addr, align 4
+ %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK-NOT: ret float 3.000000e+00
+ ret float %0
+}
+
+; This test checks if constant propagation works for multiple node edges
+; CHECK-LABEL: define i32 @_Z1ii(i32 %p)
+define i32 @_Z1ii(i32 %p) {
+entry:
+ %cmp = icmp eq i32 %p, 42
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+bb2:
+ call void @llvm.assume(i1 true)
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+
+ ; CHECK: ret i32 42
+ ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ij(i32 %p)
+define i32 @_Z1ij(i32 %p) {
+entry:
+ %cmp = icmp eq i32 %p, 42
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+bb2:
+ ; CHECK-NOT: %cmp2 =
+ %cmp2 = icmp eq i32 %p, 42
+ ; CHECK-NOT: call void @llvm.assume(
+ call void @llvm.assume(i1 %cmp2)
+
+ ; CHECK: br i1 true, label %bb2, label %bb2
+ br i1 %cmp, label %bb2, label %bb2
+
+ ; CHECK: ret i32 42
+ ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ik(i32 %p)
+define i32 @_Z1ik(i32 %p) {
+entry:
+ %cmp = icmp eq i32 %p, 42
+ call void @llvm.assume(i1 %cmp)
+
+ ; CHECK: br i1 true, label %bb2, label %bb3
+ br i1 %cmp, label %bb2, label %bb3
+bb2:
+ ; CHECK-NOT: %cmp3 =
+ %cmp3 = icmp eq i32 %p, 43
+ ; CHECK: store i8 undef, i8* null
+ call void @llvm.assume(i1 %cmp3)
+ ret i32 15
+bb3:
+ ret i32 17
+}
+
+declare noalias i8* @_Znwm(i64)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare void @llvm.assume(i1)
+declare i32 @_ZN1A3fooEv(%struct.A*)
+declare i32 @_ZN1A3barEv(%struct.A*)
+
+!0 = !{!"struct A"}
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index f076a8d81ace..0d09ecedc6ac 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,4 +1,4 @@
-; RUN: opt -no-aa -gvn -S < %s
+; RUN: opt -disable-basicaa -gvn -S < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-freebsd8.0"
diff --git a/test/Transforms/GVN/funclet.ll b/test/Transforms/GVN/funclet.ll
new file mode 100644
index 000000000000..2669256f0bdc
--- /dev/null
+++ b/test/Transforms/GVN/funclet.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+%struct.A = type { i32* }
+
+@"_TI1?AUA@@" = external constant %eh.ThrowInfo
+
+define i8 @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %b = alloca i8
+ %c = alloca i8
+ store i8 42, i8* %b
+ store i8 13, i8* %c
+ invoke void @_CxxThrowException(i8* %b, %eh.ThrowInfo* nonnull @"_TI1?AUA@@")
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %catchpad = catchpad within %cs1 [i8* null, i32 64, i8* null]
+ store i8 5, i8* %b
+ catchret from %catchpad to label %try.cont
+
+try.cont: ; preds = %catch
+ %load_b = load i8, i8* %b
+ %load_c = load i8, i8* %c
+ %add = add i8 %load_b, %load_c
+ ret i8 %add
+
+unreachable: ; preds = %entry
+ unreachable
+}
+; CHECK-LABEL: define i8 @f(
+; CHECK: %[[load_b:.*]] = load i8, i8* %b
+; CHECK-NEXT: %[[load_c:.*]] = load i8, i8* %c
+; CHECK-NEXT: %[[add:.*]] = add i8 %[[load_b]], %[[load_c]]
+; CHECK-NEXT: ret i8 %[[add]]
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
index 982da8cfe486..f74fd3392c18 100644
--- a/test/Transforms/GVN/invariant-load.ll
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -114,6 +114,23 @@ entry:
ret i32 %res
}
+define i32 @test8(i1 %cnd, i32* %p) {
+; CHECK-LABEL: test8
+; CHECK: @bar
+; CHECK: load i32, i32* %p2, !invariant.load
+; CHECK: br label %merge
+entry:
+ %v1 = load i32, i32* %p, !invariant.load !0
+ br i1 %cnd, label %taken, label %merge
+taken:
+ %p2 = call i32* (...) @bar(i32* %p)
+ br label %merge
+merge:
+ %p3 = phi i32* [%p, %entry], [%p2, %taken]
+ %v2 = load i32, i32* %p3, !invariant.load !0
+ %res = sub i32 %v1, %v2
+ ret i32 %res
+}
!0 = !{ }
diff --git a/test/Transforms/GVN/invariant.group.ll b/test/Transforms/GVN/invariant.group.ll
new file mode 100644
index 000000000000..f703fda93f23
--- /dev/null
+++ b/test/Transforms/GVN/invariant.group.ll
@@ -0,0 +1,337 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8
+@_ZTI1A = external constant i8*
+
+@unknownPtr = external global i8
+
+; CHECK-LABEL: define i8 @simple() {
+define i8 @simple() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+
+ %a = load i8, i8* %ptr, !invariant.group !0
+ %b = load i8, i8* %ptr, !invariant.group !0
+ %c = load i8, i8* %ptr, !invariant.group !0
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @optimizable1() {
+define i8 @optimizable1() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+
+ call void @foo(i8* %ptr2); call to use %ptr2
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @optimizable2() {
+define i8 @optimizable2() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+
+ store i8 13, i8* %ptr ; can't use this store with invariant.group
+ %a = load i8, i8* %ptr
+ call void @bar(i8 %a) ; call to use %a
+
+ call void @foo(i8* %ptr)
+ %b = load i8, i8* %ptr, !invariant.group !0
+
+; CHECK: ret i8 42
+ ret i8 %b
+}
+
+; CHECK-LABEL: define i8 @unoptimizable1() {
+define i8 @unoptimizable1() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define void @indirectLoads() {
+define void @indirectLoads() {
+entry:
+ %a = alloca %struct.A*, align 8
+ %0 = bitcast %struct.A** %a to i8*
+
+ %call = call i8* @getPointer(i8* null)
+ %1 = bitcast i8* %call to %struct.A*
+ call void @_ZN1AC1Ev(%struct.A* %1)
+ %2 = bitcast %struct.A* %1 to i8***
+
+; CHECK: %vtable = load {{.*}} !invariant.group
+ %vtable = load i8**, i8*** %2, align 8, !invariant.group !2
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
+ call void @llvm.assume(i1 %cmp.vtables)
+
+ store %struct.A* %1, %struct.A** %a, align 8
+ %3 = load %struct.A*, %struct.A** %a, align 8
+ %4 = bitcast %struct.A* %3 to void (%struct.A*)***
+
+; CHECK: call void @_ZN1A3fooEv(
+ %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2
+ %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
+ %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
+ call void %5(%struct.A* %3)
+ %6 = load %struct.A*, %struct.A** %a, align 8
+ %7 = bitcast %struct.A* %6 to void (%struct.A*)***
+
+; CHECK: call void @_ZN1A3fooEv(
+ %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2
+ %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0
+ %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8
+
+ call void %8(%struct.A* %6)
+ %9 = load %struct.A*, %struct.A** %a, align 8
+ %10 = bitcast %struct.A* %9 to void (%struct.A*)***
+
+ %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2
+ %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0
+ %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8
+; CHECK: call void @_ZN1A3fooEv(
+ call void %11(%struct.A* %9)
+
+ %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !2
+ %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0
+ %12 = bitcast i8** %vfn6 to void (%struct.A*)**
+ %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8
+; CHECK: call void @_ZN1A3fooEv(
+ call void %13(%struct.A* %9)
+
+ ret void
+}
+
+; CHECK-LABEL: define void @combiningBitCastWithLoad() {
+define void @combiningBitCastWithLoad() {
+entry:
+ %a = alloca %struct.A*, align 8
+ %0 = bitcast %struct.A** %a to i8*
+
+ %call = call i8* @getPointer(i8* null)
+ %1 = bitcast i8* %call to %struct.A*
+ call void @_ZN1AC1Ev(%struct.A* %1)
+ %2 = bitcast %struct.A* %1 to i8***
+
+; CHECK: %vtable = load {{.*}} !invariant.group
+ %vtable = load i8**, i8*** %2, align 8, !invariant.group !2
+ %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
+
+ store %struct.A* %1, %struct.A** %a, align 8
+; CHECK-NOT: !invariant.group
+ %3 = load %struct.A*, %struct.A** %a, align 8
+ %4 = bitcast %struct.A* %3 to void (%struct.A*)***
+
+ %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2
+ %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
+ %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
+ call void %5(%struct.A* %3)
+
+ ret void
+}
+
+; CHECK-LABEL:define void @loadCombine() {
+define void @loadCombine() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group
+ %a = load i8, i8* %ptr, !invariant.group !0
+; CHECK-NOT: load
+ %b = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[A]])
+ call void @bar(i8 %a)
+; CHECK: call void @bar(i8 %[[A]])
+ call void @bar(i8 %b)
+ ret void
+}
+
+; CHECK-LABEL: define void @loadCombine1() {
+define void @loadCombine1() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group
+ %c = load i8, i8* %ptr
+; CHECK-NOT: load
+ %d = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[D]])
+ call void @bar(i8 %c)
+; CHECK: call void @bar(i8 %[[D]])
+ call void @bar(i8 %d)
+ ret void
+}
+
+; CHECK-LABEL: define void @loadCombine2() {
+define void @loadCombine2() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group
+ %e = load i8, i8* %ptr, !invariant.group !1
+; CHECK-NOT: load
+ %f = load i8, i8* %ptr
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %e)
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %f)
+ ret void
+}
+
+; CHECK-LABEL: define void @loadCombine3() {
+define void @loadCombine3() {
+enter:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]]
+ %e = load i8, i8* %ptr, !invariant.group !1
+; CHECK-NOT: load
+ %f = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %e)
+; CHECK: call void @bar(i8 %[[E]])
+ call void @bar(i8 %f)
+ ret void
+}
+
+; CHECK-LABEL: define i8 @unoptimizable2() {
+define i8 @unoptimizable2() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr
+ call void @foo(i8* %ptr)
+ %b = load i8, i8* %ptr, !invariant.group !0
+
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @unoptimizable3() {
+define i8 @unoptimizable3() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ %ptr2 = call i8* @getPointer(i8* %ptr)
+ %a = load i8, i8* %ptr2, !invariant.group !0
+
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @unoptimizable4() {
+define i8 @unoptimizable4() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+ %a = load i8, i8* %ptr2, !invariant.group !0
+
+; CHECK: ret i8 %a
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @volatile1() {
+define i8 @volatile1() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+ %b = load volatile i8, i8* %ptr
+; CHECK: call void @bar(i8 %b)
+ call void @bar(i8 %b)
+
+ %c = load volatile i8, i8* %ptr, !invariant.group !0
+; FIXME: we could change %c to 42, preserving volatile load
+; CHECK: call void @bar(i8 %c)
+ call void @bar(i8 %c)
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @volatile2() {
+define i8 @volatile2() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+ %a = load i8, i8* %ptr, !invariant.group !0
+ %b = load volatile i8, i8* %ptr
+; CHECK: call void @bar(i8 %b)
+ call void @bar(i8 %b)
+
+ %c = load volatile i8, i8* %ptr, !invariant.group !0
+; FIXME: we could change %c to 42, preserving volatile load
+; CHECK: call void @bar(i8 %c)
+ call void @bar(i8 %c)
+; CHECK: ret i8 42
+ ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @fun() {
+define i8 @fun() {
+entry:
+ %ptr = alloca i8
+ store i8 42, i8* %ptr, !invariant.group !0
+ call void @foo(i8* %ptr)
+
+ %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+; CHECK: call void @bar(i8 42)
+ call void @bar(i8 %a)
+
+ call void @foo(i8* %ptr)
+ %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
+; CHECK: call void @bar(i8 %b)
+ call void @bar(i8 %b)
+
+ %newPtr = call i8* @getPointer(i8* %ptr)
+ %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
+; CHECK: call void @bar(i8 %c)
+ call void @bar(i8 %c)
+
+ %unknownValue = load i8, i8* @unknownPtr
+; FIXME: Can assume that %unknownValue == 42
+; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0
+ store i8 %unknownValue, i8* %ptr, !invariant.group !0
+
+ %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+ %d = load i8, i8* %newPtr2, !invariant.group !0 ; Can't step through invariant.group.barrier to get value of %ptr
+; CHECK: ret i8 %d
+ ret i8 %d
+}
+
+declare void @foo(i8*)
+declare void @bar(i8)
+declare i8* @getPointer(i8*)
+declare void @_ZN1A3fooEv(%struct.A*)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1 %cmp.vtables) #0
+
+
+attributes #0 = { nounwind }
+; CHECK: ![[OneMD]] = !{!"other ptr"}
+!0 = !{!"magic ptr"}
+!1 = !{!"other ptr"}
+!2 = !{!"vtable_of_a"}
diff --git a/test/Transforms/GVN/load-pre-nonlocal.ll b/test/Transforms/GVN/load-pre-nonlocal.ll
index e9827a158ade..e0e886653076 100644
--- a/test/Transforms/GVN/load-pre-nonlocal.ll
+++ b/test/Transforms/GVN/load-pre-nonlocal.ll
@@ -61,7 +61,7 @@ for.end:
; CHECK-NOT: %1 = load i32, i32*
; CHECK: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
-define i32 @overaligned_load(i32 %a, i32* nocapture %b) {
+define i32 @overaligned_load(i32 %a, i32* nocapture %b) !dbg !13 {
entry:
%cmp = icmp sgt i32 %a, 0, !dbg !14
br i1 %cmp, label %if.then, label %if.else, !dbg !14
@@ -99,7 +99,7 @@ if.end:
!10 = !{}
!11 = !DISubroutineType(types: !10)
!12 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!13 = !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32, i32*)* @overaligned_load, variables: !10)
+!13 = distinct !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !10)
!14 = !DILocation(line: 100, column: 1, scope: !13)
!15 = !DILocation(line: 101, column: 1, scope: !13)
!16 = !DILocation(line: 102, column: 1, scope: !13)
diff --git a/test/Transforms/GVN/no_speculative_loads_with_asan.ll b/test/Transforms/GVN/no_speculative_loads_with_asan.ll
new file mode 100644
index 000000000000..2e790db1b2ad
--- /dev/null
+++ b/test/Transforms/GVN/no_speculative_loads_with_asan.ll
@@ -0,0 +1,57 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+declare noalias i8* @_Znam(i64) #1
+
+define i32 @TestNoAsan() {
+ %1 = tail call noalias i8* @_Znam(i64 2)
+ %2 = getelementptr inbounds i8, i8* %1, i64 1
+ store i8 0, i8* %2, align 1
+ store i8 0, i8* %1, align 1
+ %3 = bitcast i8* %1 to i16*
+ %4 = load i16, i16* %3, align 4
+ %5 = icmp eq i16 %4, 0
+ br i1 %5, label %11, label %6
+
+; <label>:6 ; preds = %0
+ %7 = getelementptr inbounds i8, i8* %1, i64 2
+ %8 = bitcast i8* %7 to i16*
+ %9 = load i16, i16* %8, align 2
+ %10 = sext i16 %9 to i32
+ br label %11
+
+; <label>:11 ; preds = %0, %6
+ %12 = phi i32 [ %10, %6 ], [ 0, %0 ]
+ ret i32 %12
+}
+
+; CHECK-LABEL: @TestNoAsan
+; CHECK: %[[LOAD:[^ ]+]] = load i32
+; CHECK: {{.*}} = ashr i32 %[[LOAD]]
+; CHECK-NOT: {{.*}} = phi
+
+define i32 @TestAsan() sanitize_address {
+ %1 = tail call noalias i8* @_Znam(i64 2)
+ %2 = getelementptr inbounds i8, i8* %1, i64 1
+ store i8 0, i8* %2, align 1
+ store i8 0, i8* %1, align 1
+ %3 = bitcast i8* %1 to i16*
+ %4 = load i16, i16* %3, align 4
+ %5 = icmp eq i16 %4, 0
+ br i1 %5, label %11, label %6
+
+; <label>:6 ; preds = %0
+ %7 = getelementptr inbounds i8, i8* %1, i64 2
+ %8 = bitcast i8* %7 to i16*
+ %9 = load i16, i16* %8, align 2
+ %10 = sext i16 %9 to i32
+ br label %11
+
+; <label>:11 ; preds = %0, %6
+ %12 = phi i32 [ %10, %6 ], [ 0, %0 ]
+ ret i32 %12
+}
+
+; CHECK-LABEL: @TestAsan
+; CHECK-NOT: %[[LOAD:[^ ]+]] = load i32
+; CHECK: {{.*}} = phi
+
diff --git a/test/Transforms/GVN/phi-translate.ll b/test/Transforms/GVN/phi-translate.ll
index 9e37b882f222..67036ab9746c 100644
--- a/test/Transforms/GVN/phi-translate.ll
+++ b/test/Transforms/GVN/phi-translate.ll
@@ -18,7 +18,7 @@ target datalayout = "e-p:64:64:64"
; CHECK-DAG: [[N_LOC]] = !DILocation(line: 47, column: 1, scope: !{{.*}})
@G = external global [100 x i32]
-define i32 @foo(i32 %x, i32 %z) {
+define i32 @foo(i32 %x, i32 %z) !dbg !6 {
entry:
%tobool = icmp eq i32 %z, 0, !dbg !7
br i1 %tobool, label %end, label %then, !dbg !7
@@ -44,7 +44,7 @@ end:
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "a.cc", directory: "/tmp")
-!6 = !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32, i32)* @foo, variables: !3)
+!6 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 43, column: 1, scope: !6)
!8 = !DILocation(line: 44, column: 1, scope: !6)
!9 = !DILocation(line: 45, column: 1, scope: !6)
diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll
index eafe418dbdc6..ec1b1717f067 100644
--- a/test/Transforms/GVN/pr14166.ll
+++ b/test/Transforms/GVN/pr14166.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn -S < %s | FileCheck %s
+; RUN: opt -disable-basicaa -gvn -S < %s | FileCheck %s
target datalayout = "e-p:32:32:32"
target triple = "i386-pc-linux-gnu"
define <2 x i32> @test1() {
diff --git a/test/Transforms/GVN/pr24426.ll b/test/Transforms/GVN/pr24426.ll
new file mode 100644
index 000000000000..76b190f8fc22
--- /dev/null
+++ b/test/Transforms/GVN/pr24426.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -memcpyopt -mldst-motion -gvn -S | FileCheck %s
+
+declare void @check(i8)
+
+declare void @write(i8* %res)
+
+define void @test1() {
+ %1 = alloca [10 x i8]
+ %2 = bitcast [10 x i8]* %1 to i8*
+ call void @write(i8* %2)
+ %3 = load i8, i8* %2
+
+; CHECK-NOT: undef
+ call void @check(i8 %3)
+
+ ret void
+}
+
diff --git a/test/Transforms/GVN/pr25440.ll b/test/Transforms/GVN/pr25440.ll
new file mode 100644
index 000000000000..14e2c30f04b2
--- /dev/null
+++ b/test/Transforms/GVN/pr25440.ll
@@ -0,0 +1,108 @@
+;RUN: opt -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+%struct.a = type { i16, i16, [1 x %union.a] }
+%union.a = type { i32 }
+
+@length = external global [0 x i32], align 4
+
+; Function Attrs: nounwind
+define fastcc void @foo(%struct.a* nocapture readonly %x) {
+;CHECK-LABEL: foo
+entry:
+ br label %bb0
+
+bb0: ; preds = %land.lhs.true, %entry
+;CHECK: bb0:
+ %x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ]
+ %code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0
+ %0 = load i16, i16* %code1, align 4
+; CHECK: load i32, i32*
+ %conv = zext i16 %0 to i32
+ switch i32 %conv, label %if.end.50 [
+ i32 43, label %cleanup
+ i32 52, label %if.then.5
+ ]
+
+if.then.5: ; preds = %bb0
+ br i1 undef, label %land.lhs.true, label %if.then.26
+
+land.lhs.true: ; preds = %if.then.5
+ br i1 undef, label %cleanup, label %bb0
+
+if.then.26: ; preds = %if.then.5
+ %x.tr.lcssa163 = phi %struct.a* [ %x.tr, %if.then.5 ]
+ br i1 undef, label %cond.end, label %cond.false
+
+cond.false: ; preds = %if.then.26
+; CHECK: cond.false:
+; CHECK-NOT: load
+ %mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1
+ %bf.load = load i16, i16* %mode, align 2
+ %bf.shl = shl i16 %bf.load, 8
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %if.then.26
+ br i1 undef, label %if.then.44, label %cleanup
+
+if.then.44: ; preds = %cond.end
+ unreachable
+
+if.end.50: ; preds = %bb0
+;%CHECK: if.end.50:
+ %conv.lcssa = phi i32 [ %conv, %bb0 ]
+ %arrayidx52 = getelementptr inbounds [0 x i32], [0 x i32]* @length, i32 0, i32 %conv.lcssa
+ %1 = load i32, i32* %arrayidx52, align 4
+ br i1 undef, label %for.body.57, label %cleanup
+
+for.body.57: ; preds = %if.end.50
+ %i.2157 = add nsw i32 %1, -1
+ unreachable
+
+cleanup: ; preds = %if.end.50, %cond.end, %land.lhs.true, %bb0
+ ret void
+}
+
+@yy_c_buf_p = external unnamed_addr global i8*, align 4
+@dfg_text = external global i8*, align 4
+
+define void @dfg_lex() {
+;CHECK-LABEL: dfg_lex
+entry:
+ br label %while.bodythread-pre-split
+
+while.bodythread-pre-split: ; preds = %while.end, %while.end, %entry
+ br i1 undef, label %if.then.14, label %if.end.15
+
+if.then.14: ; preds = %while.end, %while.bodythread-pre-split
+ %v1 = load i32, i32* bitcast (i8** @dfg_text to i32*), align 4
+ %sub.ptr.sub = sub i32 undef, %v1
+ br label %if.end.15
+
+if.end.15: ; preds = %if.then.14, %while.bodythread-pre-split
+ %v2 = load i8*, i8** @yy_c_buf_p, align 4
+ br label %while.cond.16
+
+while.cond.16: ; preds = %while.cond.16, %if.end.15
+ br i1 undef, label %while.cond.16, label %while.end
+
+while.end: ; preds = %while.cond.16
+ %add.ptr = getelementptr inbounds i8, i8* %v2, i32 undef
+ store i8* %add.ptr, i8** @dfg_text, align 4
+ %sub.ptr.rhs.cast25 = ptrtoint i8* %add.ptr to i32
+ %sub.ptr.sub26 = sub i32 0, %sub.ptr.rhs.cast25
+ switch i32 undef, label %sw.default [
+ i32 65, label %while.bodythread-pre-split
+ i32 3, label %return
+ i32 57, label %while.bodythread-pre-split
+ i32 60, label %if.then.14
+ ]
+
+sw.default: ; preds = %while.end
+ unreachable
+
+return: ; preds = %while.end
+ ret void
+}
diff --git a/test/Transforms/GVN/pre-gep-load.ll b/test/Transforms/GVN/pre-gep-load.ll
index 291af359a7a1..a46dc22ade89 100644
--- a/test/Transforms/GVN/pre-gep-load.ll
+++ b/test/Transforms/GVN/pre-gep-load.ll
@@ -47,3 +47,34 @@ return: ; preds = %sw.default, %sw.bb2
%retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
ret double %retval.0
}
+
+; The load causes the GEP's operands to be PREd earlier than normal. The
+; resulting sext ends up in pre.dest and in the GVN system before that BB is
+; actually processed. Make sure we can deal with the situation.
+
+define void @test_shortcut_safe(i1 %tst, i32 %p1, i32* %a) {
+; CHECK-LABEL: define void @test_shortcut_safe
+; CHECK: [[SEXT1:%.*]] = sext i32 %p1 to i64
+; CHECK: [[PHI1:%.*]] = phi i64 [ [[SEXT1]], {{%.*}} ], [ [[PHI2:%.*]], {{%.*}} ]
+; CHECK: [[SEXT2:%.*]] = sext i32 %p1 to i64
+; CHECK: [[PHI2]] = phi i64 [ [[SEXT2]], {{.*}} ], [ [[PHI1]], {{%.*}} ]
+; CHECK: getelementptr inbounds i32, i32* %a, i64 [[PHI2]]
+
+ br i1 %tst, label %sext1, label %pre.dest
+
+pre.dest:
+ br label %sext.use
+
+sext1:
+ %idxprom = sext i32 %p1 to i64
+ br label %sext.use
+
+sext.use:
+ %idxprom2 = sext i32 %p1 to i64
+ %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom2
+ %val = load i32, i32* %arrayidx3, align 4
+ tail call void (i32) @g(i32 %val)
+ br label %pre.dest
+}
+
+declare void @g(i32)
diff --git a/test/Transforms/GVN/pre-load.ll b/test/Transforms/GVN/pre-load.ll
index 24221d540f22..685df24f62b6 100644
--- a/test/Transforms/GVN/pre-load.ll
+++ b/test/Transforms/GVN/pre-load.ll
@@ -389,3 +389,44 @@ block5:
; CHECK: block4:
; CHECK-NEXT: phi i32
}
+
+declare void @f()
+declare void @g(i32)
+declare i32 @__CxxFrameHandler3(...)
+
+; Test that loads aren't PRE'd into EH pads.
+define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test12(
+block1:
+ invoke void @f()
+ to label %block2 unwind label %catch.dispatch
+
+block2:
+ invoke void @f()
+ to label %block3 unwind label %cleanup
+
+block3:
+ ret void
+
+catch.dispatch:
+ %cs1 = catchswitch within none [label %catch] unwind label %cleanup2
+
+catch:
+ %c = catchpad within %cs1 []
+ catchret from %c to label %block2
+
+cleanup:
+ %c1 = cleanuppad within none []
+ store i32 0, i32* %p
+ cleanupret from %c1 unwind label %cleanup2
+
+; CHECK: cleanup2:
+; CHECK-NOT: phi
+; CHECK-NEXT: %c2 = cleanuppad within none []
+; CHECK-NEXT: %NOTPRE = load i32, i32* %p
+cleanup2:
+ %c2 = cleanuppad within none []
+ %NOTPRE = load i32, i32* %p
+ call void @g(i32 %NOTPRE)
+ cleanupret from %c2 unwind to caller
+}
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 297c6aac88dd..39acc0c35157 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -1,7 +1,7 @@
; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
define i32 @test1(i32* %p) {
-; CHECK: @test1(i32* %p)
+; CHECK-LABEL: @test1(i32* %p)
; CHECK: %a = load i32, i32* %p, !range !0
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !0
@@ -11,7 +11,7 @@ define i32 @test1(i32* %p) {
}
define i32 @test2(i32* %p) {
-; CHECK: @test2(i32* %p)
+; CHECK-LABEL: @test2(i32* %p)
; CHECK: %a = load i32, i32* %p
; CHECK-NOT: range
; CHECK: %c = add i32 %a, %a
@@ -22,7 +22,7 @@ define i32 @test2(i32* %p) {
}
define i32 @test3(i32* %p) {
-; CHECK: @test3(i32* %p)
+; CHECK-LABEL: @test3(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !0
@@ -32,7 +32,7 @@ define i32 @test3(i32* %p) {
}
define i32 @test4(i32* %p) {
-; CHECK: @test4(i32* %p)
+; CHECK-LABEL: @test4(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_RANGE:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !0
@@ -42,7 +42,7 @@ define i32 @test4(i32* %p) {
}
define i32 @test5(i32* %p) {
-; CHECK: @test5(i32* %p)
+; CHECK-LABEL: @test5(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !3
@@ -52,7 +52,7 @@ define i32 @test5(i32* %p) {
}
define i32 @test6(i32* %p) {
-; CHECK: @test6(i32* %p)
+; CHECK-LABEL: @test6(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST6:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !5
@@ -62,7 +62,7 @@ define i32 @test6(i32* %p) {
}
define i32 @test7(i32* %p) {
-; CHECK: @test7(i32* %p)
+; CHECK-LABEL: @test7(i32* %p)
; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST7:[0-9]+]]
; CHECK: %c = add i32 %a, %a
%a = load i32, i32* %p, !range !7
@@ -72,7 +72,7 @@ define i32 @test7(i32* %p) {
}
define i32 @test8(i32* %p) {
-; CHECK: @test8(i32* %p)
+; CHECK-LABEL: @test8(i32* %p)
; CHECK: %a = load i32, i32* %p
; CHECK-NOT: range
; CHECK: %c = add i32 %a, %a
@@ -84,17 +84,17 @@ define i32 @test8(i32* %p) {
; CHECK: ![[DISJOINT_RANGE]] = !{i32 0, i32 2, i32 3, i32 5}
; CHECK: ![[MERGED_RANGE]] = !{i32 0, i32 5}
-; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -3, i32 -2, i32 1, i32 2}
+; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -5, i32 -2, i32 1, i32 5}
; CHECK: ![[MERGED_TEST6]] = !{i32 10, i32 1}
; CHECK: ![[MERGED_TEST7]] = !{i32 3, i32 4, i32 5, i32 2}
!0 = !{i32 0, i32 2}
!1 = !{i32 3, i32 5}
!2 = !{i32 2, i32 5}
-!3 = !{i32 -3, i32 -2}
-!4 = !{i32 1, i32 2}
+!3 = !{i32 -5, i32 -2}
+!4 = !{i32 1, i32 5}
!5 = !{i32 10, i32 1}
-!6 = !{i32 12, i32 13}
+!6 = !{i32 12, i32 16}
!7 = !{i32 1, i32 2, i32 3, i32 4}
!8 = !{i32 5, i32 1}
!9 = !{i32 1, i32 5}
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 584f0bf467fa..c62ec10df790 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -5,14 +5,14 @@
@A = global i32 0
; CHECK: @A = global i32 0
-@D = internal alias i32* @A
+@D = internal alias i32, i32* @A
; DEAD-NOT: @D
-@L1 = alias i32* @A
-; CHECK: @L1 = alias i32* @A
+@L1 = alias i32, i32* @A
+; CHECK: @L1 = alias i32, i32* @A
-@L2 = internal alias i32* @L1
-; CHECK: @L2 = internal alias i32* @L1
+@L2 = internal alias i32, i32* @L1
+; CHECK: @L2 = internal alias i32, i32* @L1
-@L3 = alias i32* @L2
-; CHECK: @L3 = alias i32* @L2
+@L3 = alias i32, i32* @L2
+; CHECK: @L3 = alias i32, i32* @L2
diff --git a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
index 5fb4444c6ba8..17474888d79b 100644
--- a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
+++ b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -1,4 +1,4 @@
; RUN: opt < %s -globaldce
-@A = internal alias void ()* @F
+@A = internal alias void (), void ()* @F
define internal void @F() { ret void }
diff --git a/test/Transforms/GlobalDCE/pr20981.ll b/test/Transforms/GlobalDCE/pr20981.ll
index 0eaa6b899091..c3e06699da77 100644
--- a/test/Transforms/GlobalDCE/pr20981.ll
+++ b/test/Transforms/GlobalDCE/pr20981.ll
@@ -3,8 +3,8 @@
$c1 = comdat any
; CHECK: $c1 = comdat any
-@a1 = linkonce_odr alias void ()* @f1
-; CHECK: @a1 = linkonce_odr alias void ()* @f1
+@a1 = linkonce_odr alias void (), void ()* @f1
+; CHECK: @a1 = linkonce_odr alias void (), void ()* @f1
define linkonce_odr void @f1() comdat($c1) {
ret void
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index a3e90045d64b..e6337adefa13 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -2,7 +2,7 @@
@g = global i32 0
-@a = alias bitcast (i32* @g to i8*)
+@a = alias i8, bitcast (i32* @g to i8*)
define void @f() {
%tmp = load i8, i8* @a
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index 6933d4a8d96c..42c243d9d7c0 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -6,14 +6,14 @@ define internal void @f() {
ret void
}
-@a = alias void ()* @f
+@a = alias void (), void ()* @f
define void @g() {
call void() @a()
ret void
}
-@b = internal alias void ()* @g
+@b = internal alias void (), void ()* @g
; CHECK-NOT: @b
define void @h() {
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index a8d618ae522d..f74f2081dc20 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -56,10 +56,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.gv = !{!0}
!0 = !DIGlobalVariable(name: "Stop", line: 2, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !2, variable: i32* @Stop)
-!1 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!1 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
!2 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!3 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 0, scope: !4, file: !1, type: !2)
-!4 = !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
+!3 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !4, file: !1, type: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
!5 = !DISubroutineType(types: !6)
!6 = !{!2, !2}
!7 = !DILocation(line: 5, scope: !8)
@@ -70,7 +70,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!12 = !DILocation(line: 11, scope: !8)
!13 = !DILocation(line: 14, scope: !14)
!14 = distinct !DILexicalBlock(line: 0, column: 0, file: !20, scope: !15)
-!15 = !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
+!15 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
!16 = !DISubroutineType(types: !17)
!17 = !{!2}
!18 = !DILocation(line: 15, scope: !14)
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 090d78455226..46b90ec29b9d 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,20 +1,20 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
-@foo1 = alias void ()* @foo2
-; CHECK: @foo1 = alias void ()* @bar2
+@foo1 = alias void (), void ()* @foo2
+; CHECK: @foo1 = alias void (), void ()* @bar2
-@foo2 = alias void()* @bar1
-; CHECK: @foo2 = alias void ()* @bar2
+@foo2 = alias void(), void()* @bar1
+; CHECK: @foo2 = alias void (), void ()* @bar2
-@bar1 = alias void ()* @bar2
-; CHECK: @bar1 = alias void ()* @bar2
+@bar1 = alias void (), void ()* @bar2
+; CHECK: @bar1 = alias void (), void ()* @bar2
-@weak1 = weak alias void ()* @bar2
-; CHECK: @weak1 = weak alias void ()* @bar2
+@weak1 = weak alias void (), void ()* @bar2
+; CHECK: @weak1 = weak alias void (), void ()* @bar2
@bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
-@foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
-; CHECK: @foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
+@foo4 = weak_odr unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = weak_odr unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
define void @bar2() {
ret void
@@ -37,7 +37,7 @@ entry:
ret void
}
-@foo3 = alias void ()* @bar3
+@foo3 = alias void (), void ()* @bar3
; CHECK-NOT: bar3
define internal void @bar3() {
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
index 62e74ba2ab48..367f375ec900 100644
--- a/test/Transforms/GlobalOpt/alias-used-address-space.ll
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -7,7 +7,7 @@ target datalayout = "p:32:32:32-p1:16:16:16"
@i = internal addrspace(1) global i8 42
; CHECK: @ia = internal addrspace(1) global i8 42
-@ia = internal alias i8 addrspace(1)* @i
+@ia = internal alias i8, i8 addrspace(1)* @i
@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
@@ -18,8 +18,8 @@ target datalayout = "p:32:32:32-p1:16:16:16"
@sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
-@ca = internal alias i8 addrspace(1)* @c
-; CHECK: @ca = internal alias i8 addrspace(1)* @c
+@ca = internal alias i8, i8 addrspace(1)* @c
+; CHECK: @ca = internal alias i8, i8 addrspace(1)* @c
define i8 addrspace(1)* @h() {
ret i8 addrspace(1)* @ca
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
index 4dab2f5a02f5..a3657dfd16bc 100644
--- a/test/Transforms/GlobalOpt/alias-used-section.ll
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -globalopt < %s | FileCheck %s
@_Z17in_custom_section = internal global i8 42, section "CUSTOM"
-@in_custom_section = internal dllexport alias i8* @_Z17in_custom_section
+@in_custom_section = internal dllexport alias i8, i8* @_Z17in_custom_section
; CHECK: @in_custom_section = internal dllexport global i8 42, section "CUSTOM"
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index 21f06b7be5ff..9ced3974ee87 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -4,10 +4,10 @@
@i = internal global i8 42
; CHECK: @ia = internal global i8 42
-@ia = internal alias i8* @i
+@ia = internal alias i8, i8* @i
@llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
-; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
+; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* @ca, i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*)], section "llvm.metadata"
@llvm.compiler.used = appending global [4 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* bitcast (void ()* @fa to i8*), i8* @ia, i8* @i], section "llvm.metadata"
; CHECK-DAG: @llvm.compiler.used = appending global [2 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* @ia], section "llvm.metadata"
@@ -18,17 +18,17 @@
@other = global i32* bitcast (void ()* @fa to i32*)
; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
-@fa = internal alias void ()* @f
-; CHECK: @fa = internal alias void ()* @f
+@fa = internal alias void (), void ()* @f
+; CHECK: @fa = internal alias void (), void ()* @f
-@fa2 = internal alias void ()* @f
+@fa2 = internal alias void (), void ()* @f
; CHECK-NOT: @fa2
-@fa3 = internal alias void ()* @f
+@fa3 = internal alias void (), void ()* @f
; CHECK: @fa3
-@ca = internal alias i8* @c
-; CHECK: @ca = internal alias i8* @c
+@ca = internal alias i8, i8* @c
+; CHECK: @ca = internal alias i8, i8* @c
define void @f() {
ret void
diff --git a/test/Transforms/GlobalOpt/assume.ll b/test/Transforms/GlobalOpt/assume.ll
new file mode 100644
index 000000000000..3f3157a38fbb
--- /dev/null
+++ b/test/Transforms/GlobalOpt/assume.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; CHECK: @tmp = global i32 42
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@tmp = global i32 0
+
+define i32 @TheAnswerToLifeTheUniverseAndEverything() {
+ ret i32 42
+}
+
+define void @_GLOBAL__I_a() {
+enter:
+ %tmp1 = call i32 @TheAnswerToLifeTheUniverseAndEverything()
+ store i32 %tmp1, i32* @tmp
+ %cmp = icmp eq i32 %tmp1, 42
+ call void @llvm.assume(i1 %cmp)
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll
new file mode 100644
index 000000000000..7092a5ae2226
--- /dev/null
+++ b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; Verify that the initialization of the available_externally global is not eliminated
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }]
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }]
+@foo_external = available_externally global void ()* null
+
+define internal void @foo_static_init() {
+entry:
+ store void ()* @foo_impl, void ()** @foo_external
+ ret void
+}
+
+define internal void @foo_impl() {
+entry:
+ ret void
+}
+
diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll
index 9563a23b2c29..f5eed44cbb6e 100644
--- a/test/Transforms/GlobalOpt/deadglobal.ll
+++ b/test/Transforms/GlobalOpt/deadglobal.ll
@@ -1,11 +1,14 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
@G1 = internal global i32 123 ; <i32*> [#uses=1]
+@A1 = internal alias i32, i32* @G1
; CHECK-NOT: @G1
; CHECK: @G2
; CHECK-NOT: @G3
+; CHECK-NOT: @A1
+
define void @foo1() {
; CHECK: define void @foo
; CHECK-NEXT: ret
diff --git a/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll b/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
new file mode 100644
index 000000000000..b446d24f1fd2
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+; This global is externally_initialized, so if we split it into scalars we
+; should keep that flag set on all of the new globals. This will prevent the
+; store to @a[0] from being constant propagated to the load in @foo, but will not
+; prevent @a[1] from being removed since it is dead.
+; CHECK: @a.0 = internal unnamed_addr externally_initialized global i32 undef
+; CHECK-NOT @a.1
+@a = internal externally_initialized global [2 x i32] undef, align 4
+; This is the same, but a struct rather than an array.
+; CHECK: @b.0 = internal unnamed_addr externally_initialized global i32 undef
+; CHECK-NOT @b.1
+@b = internal externally_initialized global {i32, i32} undef, align 4
+
+define i32 @foo() {
+; CHECK-LABEL: define i32 @foo
+entry:
+; This load uses the split global, but cannot be constant-propagated away.
+; CHECK: %0 = load i32, i32* @a.0
+ %0 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0), align 4
+ ret i32 %0
+}
+
+define i32 @bar() {
+; CHECK-LABEL: define i32 @bar
+entry:
+; This load uses the split global, but cannot be constant-propagated away.
+; CHECK: %0 = load i32, i32* @b.0
+ %0 = load i32, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 0), align 4
+ ret i32 %0
+}
+
+define void @init() {
+; CHECK-LABEL: define void @init
+entry:
+; This store uses the split global, but cannot be constant-propagated away.
+; CHECK: store i32 1, i32* @a.0
+ store i32 1, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0), align 4
+; This store can be removed, because the second element of @a is never read.
+; CHECK-NOT: store i32 2, i32* @a.1
+ store i32 2, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 1), align 4
+
+; This store uses the split global, but cannot be constant-propagated away.
+; CHECK: store i32 3, i32* @b.0
+ store i32 3, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 0), align 4
+; This store can be removed, because the second element of @b is never read.
+; CHECK-NOT: store i32 4, i32* @b.1
+ store i32 4, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 1), align 4
+ ret void
+}
diff --git a/test/Transforms/GlobalOpt/externally-initialized.ll b/test/Transforms/GlobalOpt/externally-initialized.ll
new file mode 100644
index 000000000000..c01ba10f49c9
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+; This global is externally_initialized, which may modify the value between
+; it's static initializer and any code in this module being run, so the only
+; write to it cannot be merged into the static initialiser.
+; CHECK: @a = internal unnamed_addr externally_initialized global i32 undef
+@a = internal externally_initialized global i32 undef
+
+; This global is stored to by the external initialization, so cannot be
+; constant-propagated and removed, despite the fact that there are no writes
+; to it.
+; CHECK: @b = internal unnamed_addr externally_initialized global i32 undef
+@b = internal externally_initialized global i32 undef
+
+
+define void @foo() {
+; CHECK-LABEL: foo
+entry:
+; CHECK: store i32 42, i32* @a
+ store i32 42, i32* @a
+ ret void
+}
+define i32 @bar() {
+; CHECK-LABEL: bar
+entry:
+; CHECK: %val = load i32, i32* @a
+ %val = load i32, i32* @a
+ ret i32 %val
+}
+
+define i32 @baz() {
+; CHECK-LABEL: baz
+entry:
+; CHECK: %val = load i32, i32* @b
+ %val = load i32, i32* @b
+ ret i32 %val
+}
diff --git a/test/Transforms/GlobalOpt/global-demotion.ll b/test/Transforms/GlobalOpt/global-demotion.ll
new file mode 100644
index 000000000000..7965cb896208
--- /dev/null
+++ b/test/Transforms/GlobalOpt/global-demotion.ll
@@ -0,0 +1,80 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+@G1 = internal global i32 5
+@G2 = internal global i32 5
+@G3 = internal global i32 5
+@G4 = internal global i32 5
+@G5 = internal global i32 5
+
+; CHECK-LABEL: @test1
+define internal i32 @test1() norecurse {
+; CHECK-NOT: @G1
+ store i32 4, i32* @G1
+ %a = load i32, i32* @G1
+; CHECK: ret
+ ret i32 %a
+}
+
+; The load comes before the store which makes @G2 live before the call.
+; CHECK-LABEL: @test2
+define internal i32 @test2() norecurse {
+; CHECK-NOT: %G2
+ %a = load i32, i32* @G2
+ store i32 4, i32* @G2
+; CHECK: ret
+ ret i32 %a
+}
+
+; This global is indexed by a GEP - this makes it partial alias and we bail out.
+; FIXME: We don't actually have to bail out in this case.
+
+; CHECK-LABEL: @test3
+define internal i32 @test3() norecurse {
+; CHECK-NOT: %G3
+ %x = getelementptr i32,i32* @G3, i32 0
+ %a = load i32, i32* %x
+ store i32 4, i32* @G3
+; CHECK: ret
+ ret i32 %a
+}
+
+; The global is casted away to a larger type then loaded. The store only partially
+; covers the load, so we must not demote.
+
+; CHECK-LABEL: @test4
+define internal i32 @test4() norecurse {
+; CHECK-NOT: %G4
+ store i32 4, i32* @G4
+ %x = bitcast i32* @G4 to i64*
+ %a = load i64, i64* %x
+ %b = trunc i64 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+; The global is casted away to a smaller type then loaded. This one is fine.
+
+; CHECK-LABEL: @test5
+define internal i32 @test5() norecurse {
+; CHECK-NOT: @G5
+ store i32 4, i32* @G5
+ %x = bitcast i32* @G5 to i16*
+ %a = load i16, i16* %x
+ %b = zext i16 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+define i32 @main() norecurse {
+ %a = call i32 @test1()
+ %b = call i32 @test2()
+ %c = call i32 @test3()
+ %d = call i32 @test4()
+ %e = call i32 @test5()
+
+ %x = or i32 %a, %b
+ %y = or i32 %x, %c
+ %z = or i32 %y, %d
+ %w = or i32 %z, %e
+ ret i32 %w
+}
diff --git a/test/Transforms/GlobalOpt/invariant.group.barrier.ll b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
new file mode 100644
index 000000000000..54d91d408019
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; This test is hint, what could globalOpt optimize and what it can't
+; FIXME: @tmp and @tmp2 can be safely set to 42
+; CHECK: @tmp = global i32 0
+; CHECK: @tmp2 = global i32 0
+; CHECK: @tmp3 = global i32 0
+
+@tmp = global i32 0
+@tmp2 = global i32 0
+@tmp3 = global i32 0
+@ptrToTmp3 = global i32* null
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define i32 @TheAnswerToLifeTheUniverseAndEverything() {
+ ret i32 42
+}
+
+define void @_GLOBAL__I_a() {
+enter:
+ call void @_optimizable()
+ call void @_not_optimizable()
+ ret void
+}
+
+define void @_optimizable() {
+enter:
+ %valptr = alloca i32
+
+ %val = call i32 @TheAnswerToLifeTheUniverseAndEverything()
+ store i32 %val, i32* @tmp
+ store i32 %val, i32* %valptr
+
+ %0 = bitcast i32* %valptr to i8*
+ %barr = call i8* @llvm.invariant.group.barrier(i8* %0)
+ %1 = bitcast i8* %barr to i32*
+
+ %val2 = load i32, i32* %1
+ store i32 %val2, i32* @tmp2
+ ret void
+}
+
+; We can't step through invariant.group.barrier here, because that would change
+; this load in @usage_of_globals()
+; val = load i32, i32* %ptrVal, !invariant.group !0
+; into
+; %val = load i32, i32* @tmp3, !invariant.group !0
+; and then we could assume that %val and %val2 to be the same, which coud be
+; false, because @changeTmp3ValAndCallBarrierInside() may change the value
+; of @tmp3.
+define void @_not_optimizable() {
+enter:
+ store i32 13, i32* @tmp3, !invariant.group !0
+
+ %0 = bitcast i32* @tmp3 to i8*
+ %barr = call i8* @llvm.invariant.group.barrier(i8* %0)
+ %1 = bitcast i8* %barr to i32*
+
+ store i32* %1, i32** @ptrToTmp3
+ store i32 42, i32* %1, !invariant.group !0
+
+ ret void
+}
+define void @usage_of_globals() {
+entry:
+ %ptrVal = load i32*, i32** @ptrToTmp3
+ %val = load i32, i32* %ptrVal, !invariant.group !0
+
+ call void @changeTmp3ValAndCallBarrierInside()
+ %val2 = load i32, i32* @tmp3, !invariant.group !0
+ ret void;
+}
+
+declare void @changeTmp3ValAndCallBarrierInside()
+
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+!0 = !{!"something"}
diff --git a/test/Transforms/GlobalOpt/localize-constexpr.ll b/test/Transforms/GlobalOpt/localize-constexpr.ll
new file mode 100644
index 000000000000..3fa7db89b04b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/localize-constexpr.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S < %s -globalopt | FileCheck %s
+
+@G = internal global i32 42
+
+define i8 @f() norecurse {
+; CHECK-LABEL: @f
+; CHECK: alloca
+; CHECK-NOT: @G
+; CHECK: }
+ store i32 42, i32* @G
+ %a = load i8, i8* bitcast (i32* @G to i8*)
+ ret i8 %a
+}
+
+@H = internal global i32 42
+@Halias = alias i32, i32* @H
+
+; @H can't be localized because @Halias uses it, and @Halias can't be converted to an instruction.
+define i8 @g() norecurse {
+; CHECK-LABEL: @g
+; CHECK-NOT: alloca
+; CHECK: @H
+; CHECK: }
+ store i32 42, i32* @H
+ %a = load i8, i8* bitcast (i32* @H to i8*)
+ ret i8 %a
+}
+
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
index fb60b66ab58e..152d58e6e320 100644
--- a/test/Transforms/GlobalOpt/metadata.ll
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -5,7 +5,7 @@
; to that containing %G should likewise drop to null.
@G = internal global i8** null
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @main(i32 %argc, i8** %argv) norecurse {
; CHECK-LABEL: @main(
; CHECK: %G = alloca
store i8** %argv, i8*** @G
diff --git a/test/Transforms/GlobalOpt/tls.ll b/test/Transforms/GlobalOpt/tls.ll
index f3cb4a65704d..d010b96188f1 100644
--- a/test/Transforms/GlobalOpt/tls.ll
+++ b/test/Transforms/GlobalOpt/tls.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
+; RUN: opt -emulated-tls < %s -globalopt -S | FileCheck %s
declare void @wait()
declare void @signal()
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index 85ed829c7112..de436c62a347 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -12,7 +12,13 @@
; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
; CHECK: @e = linkonce_odr global i32 0
+; CHECK: define internal fastcc void @used_internal() unnamed_addr {
+define internal void @used_internal() {
+ ret void
+}
+
define i32 @get_e() {
+ call void @used_internal()
%t = load i32, i32* @e
ret i32 %t
}
diff --git a/test/Transforms/IndVarSimplify/bec-cmp.ll b/test/Transforms/IndVarSimplify/bec-cmp.ll
new file mode 100644
index 000000000000..06a7d5ebe4dc
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/bec-cmp.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 signext %n) #0 {
+entry:
+
+; CHECK-LABEL: @foo
+
+ %cmp.10 = icmp sgt i32 %n, 0
+ br i1 %cmp.10, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge: ; preds = %for.inc
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+ ret void
+
+for.body: ; preds = %for.body.lr.ph, %for.inc
+ %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ %cmp1 = icmp sgt i32 %i.011, %n
+ br i1 %cmp1, label %if.then, label %for.inc
+
+; CHECK-NOT: br i1 %cmp1, label %if.then, label %for.inc
+; CHECK: br i1 false, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %idxprom = sext i32 %i.011 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, 1
+ %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ store i32 %add, i32* %arrayidx3, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %inc = add nsw i32 %i.011, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/IndVarSimplify/const_phi.ll b/test/Transforms/IndVarSimplify/const_phi.ll
new file mode 100644
index 000000000000..33dc5514d3cc
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/const_phi.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR25372
+; We can compute the expression of %phi0 and that is a SCEV
+; constant. However, instcombine can't deduce this, so we can
+; potentially end up trying to handle a constant when replacing
+; congruent IVs.
+
+; CHECK-LABEL: crash
+define void @crash() {
+entry:
+ br i1 false, label %not_taken, label %pre
+
+not_taken:
+ br label %pre
+
+pre:
+; %phi0.pre and %phi1.pre are evaluated by SCEV to constant 0.
+ %phi0.pre = phi i32 [ 0, %entry ], [ 2, %not_taken ]
+ %phi1.pre = phi i32 [ 0, %entry ], [ 1, %not_taken ]
+ br label %loop
+
+loop:
+; %phi0 and %phi1 are evaluated by SCEV to constant 0.
+ %phi0 = phi i32 [ 0, %loop ], [ %phi0.pre, %pre ]
+ %phi1 = phi i32 [ 0, %loop ], [ %phi1.pre, %pre ]
+ br i1 undef, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 4d14b3681c5d..612f01e3cade 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -209,3 +209,351 @@ assert77: ; preds = %noassert68
unrolledend: ; preds = %forcond38
ret i32 0
}
+
+declare void @side_effect()
+
+define void @func_13(i32* %len.ptr) {
+; CHECK-LABEL: @func_13(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.sub.1 = add i32 %len, -1
+ %len.is.zero = icmp eq i32 %len, 0
+ br i1 %len.is.zero, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.cmp = icmp ult i32 %iv, %len
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp ult i32 %iv, %len.sub.1
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_14(i32* %len.ptr) {
+; CHECK-LABEL: @func_14(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.sub.1 = add i32 %len, -1
+ %len.is.zero = icmp eq i32 %len, 0
+ %len.is.int_min = icmp eq i32 %len, 2147483648
+ %no.entry = or i1 %len.is.zero, %len.is.int_min
+ br i1 %no.entry, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.cmp = icmp slt i32 %iv, %len
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv, %len.sub.1
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_15(i32* %len.ptr) {
+; CHECK-LABEL: @func_15(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.add.1 = add i32 %len, 1
+ %len.add.1.is.zero = icmp eq i32 %len.add.1, 0
+ br i1 %len.add.1.is.zero, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.cmp = icmp ult i32 %iv, %len.add.1
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp ult i32 %iv, %len
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_16(i32* %len.ptr) {
+; CHECK-LABEL: @func_16(
+ entry:
+ %len = load i32, i32* %len.ptr, !range !0
+ %len.add.5 = add i32 %len, 5
+ %entry.cond.0 = icmp slt i32 %len, 2147483643
+ %entry.cond.1 = icmp slt i32 4, %len.add.5
+ %entry.cond = and i1 %entry.cond.0, %entry.cond.1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.add.4 = add i32 %iv, 4
+ %iv.cmp = icmp slt i32 %iv.add.4, %len.add.5
+ br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv, %len
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_17(i32* %len.ptr) {
+; CHECK-LABEL: @func_17(
+ entry:
+ %len = load i32, i32* %len.ptr
+ %len.add.5 = add i32 %len, -5
+ %entry.cond.0 = icmp slt i32 %len, 2147483653 ;; 2147483653 == INT_MIN - (-5)
+ %entry.cond.1 = icmp slt i32 -6, %len.add.5
+ %entry.cond = and i1 %entry.cond.0, %entry.cond.1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.inc, %be ]
+ %iv = phi i32 [ -6, %entry ], [ %iv.inc, %be ]
+ call void @side_effect()
+ %iv.inc = add i32 %iv, 1
+ %iv.2.inc = add i32 %iv.2, 1
+ %iv.cmp = icmp slt i32 %iv, %len.add.5
+
+; Deduces {-5,+,1} s< (-5 + %len) from {0,+,1} < %len
+; since %len s< INT_MIN - (-5) from the entry condition
+
+; CHECK: br i1 true, label %be, label %leave
+ br i1 %iv.cmp, label %be, label %leave
+
+ be:
+; CHECK: be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.2, %len
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define i1 @func_18(i16* %tmp20, i32* %len.addr) {
+; CHECK-LABEL: @func_18(
+entry:
+ %len = load i32, i32* %len.addr, !range !0
+ %tmp18 = icmp eq i32 %len, 0
+ br i1 %tmp18, label %bb2, label %bb0.preheader
+
+bb0.preheader:
+ br label %bb0
+
+bb0:
+; CHECK: bb0:
+ %var_0.in = phi i32 [ %var_0, %bb1 ], [ %len, %bb0.preheader ]
+ %var_1 = phi i32 [ %tmp30, %bb1 ], [ 0, %bb0.preheader ]
+ %var_0 = add nsw i32 %var_0.in, -1
+ %tmp23 = icmp ult i32 %var_1, %len
+; CHECK: br i1 true, label %stay, label %bb2.loopexit
+ br i1 %tmp23, label %stay, label %bb2
+
+stay:
+; CHECK: stay:
+ %tmp25 = getelementptr inbounds i16, i16* %tmp20, i32 %var_1
+ %tmp26 = load i16, i16* %tmp25
+ %tmp29 = icmp eq i16 %tmp26, 0
+ br i1 %tmp29, label %bb1, label %bb2
+
+bb1:
+ %tmp30 = add i32 %var_1, 1
+ %tmp31 = icmp eq i32 %var_0, 0
+ br i1 %tmp31, label %bb3, label %bb0
+
+bb2:
+ ret i1 false
+
+bb3:
+ ret i1 true
+}
+
+define void @func_19(i32* %length.ptr) {
+; CHECK-LABEL: @func_19(
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %length.is.nonzero = icmp ne i32 %length, 0
+ br i1 %length.is.nonzero, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp ult i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %length
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_20(i32* %length.ptr) {
+; Like @func_19, but %length is no longer provably positive, so
+; %range.check cannot be proved to be always true.
+
+; CHECK-LABEL: @func_20(
+ entry:
+ %length = load i32, i32* %length.ptr
+ %length.is.nonzero = icmp ne i32 %length, 0
+ br i1 %length.is.nonzero, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp ult i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 %range.check, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %length
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_21(i32* %length.ptr) {
+; CHECK-LABEL: @func_21(
+
+; This checks that the backedge condition, (I + 1) < Length - 1 implies
+; (I + 1) < Length
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %lim = sub i32 %length, 1
+ %entry.cond = icmp sgt i32 %length, 1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp slt i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_22(i32* %length.ptr) {
+; CHECK-LABEL: @func_22(
+
+; This checks that the backedge condition, (I + 1) < Length - 1 implies
+; (I + 1) < Length
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %lim = sub i32 %length, 1
+ %entry.cond = icmp sgt i32 %length, 1
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp sle i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp sle i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_23(i32* %length.ptr) {
+; CHECK-LABEL: @func_23(
+ entry:
+ %length = load i32, i32* %length.ptr, !range !0
+ %entry.cond = icmp ult i32 4, %length
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ 4, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %range.check = icmp slt i32 %iv, %length
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp slt i32 %iv.inc, %length
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+define void @func_24(i32* %init.ptr) {
+; CHECK-LABEL: @func_24(
+ entry:
+ %init = load i32, i32* %init.ptr, !range !0
+ %entry.cond = icmp ugt i32 %init, 4
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i32 [ %init, %entry ], [ %iv.dec, %be ]
+ %iv.dec = add i32 %iv, -1
+ %range.check = icmp sgt i32 %iv, 4
+ br i1 %range.check, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+ call void @side_effect()
+ %be.cond = icmp sgt i32 %iv.dec, 4
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IndVarSimplify/iv-widen.ll b/test/Transforms/IndVarSimplify/iv-widen.ll
index 464b03ce5595..ccf9fa0aa0ac 100644
--- a/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -6,7 +6,7 @@ target datalayout = "n8:16:32:64"
target triple = "x86_64-apple-darwin"
-; CHECK-LABEL: @sloop
+; CHECK-LABEL: @loop_0
; CHECK-LABEL: B18:
; Only one phi now.
; CHECK: phi
@@ -16,7 +16,7 @@ target triple = "x86_64-apple-darwin"
; One trunc for the dummy() call.
; CHECK-LABEL: exit24:
; CHECK: trunc i64 {{.*}}lcssa.wide to i32
-define void @sloop(i32* %a) {
+define void @loop_0(i32* %a) {
Prologue:
br i1 undef, label %B18, label %B6
@@ -41,4 +41,30 @@ exit24: ; preds = %B18
unreachable
}
+define void @loop_1(i32 %lim) {
+; CHECK-LABEL: @loop_1(
+ entry:
+ %entry.cond = icmp ne i32 %lim, 0
+ br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+; CHECK: %indvars.iv = phi i64 [ 1, %loop.preheader ], [ %indvars.iv.next, %loop ]
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: [[IV_INC:%[^ ]+]] = add nsw i64 %indvars.iv, -1
+; CHECK: call void @dummy.i64(i64 [[IV_INC]])
+
+ %iv = phi i32 [ 1, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ %iv.inc.sub = add i32 %iv, -1
+ %iv.inc.sub.zext = zext i32 %iv.inc.sub to i64
+ call void @dummy.i64(i64 %iv.inc.sub.zext)
+ %be.cond = icmp ult i32 %iv.inc, %lim
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
+
declare void @dummy(i32)
+declare void @dummy.i64(i64)
diff --git a/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll b/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
new file mode 100644
index 000000000000..eee321da2395
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
@@ -0,0 +1,279 @@
+; RUN: opt -S -indvars %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1(i64 %start) {
+; CHECK-LABEL: @test1
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test2(i64 %start) {
+; CHECK-LABEL: @test2
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp sle i64 %start, -1
+ %cmp1 = icmp sle i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+; As long as the test dominates the backedge, we're good
+define void @test3(i64 %start) {
+; CHECK-LABEL: @test3
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test4(i64 %start) {
+; CHECK-LABEL: @test4
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp sgt i64 %start, -1
+ %cmp1 = icmp sgt i64 %indvars.iv, -1
+ br i1 %cmp1, label %loop, label %for.end
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test5(i64 %start) {
+; CHECK-LABEL: @test5
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nuw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp ugt i64 %start, 100
+ %cmp1 = icmp ugt i64 %indvars.iv, 100
+ br i1 %cmp1, label %loop, label %for.end
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test6(i64 %start) {
+; CHECK-LABEL: @test6
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nuw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp ult i64 %start, 100
+ %cmp1 = icmp ult i64 %indvars.iv, 100
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test7(i64 %start, i64* %inc_ptr) {
+; CHECK-LABEL: @test7
+entry:
+ %inc = load i64, i64* %inc_ptr, !range !0
+ %ok = icmp sge i64 %inc, 0
+ br i1 %ok, label %loop, label %for.end
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+!0 = !{i64 0, i64 100}
+
+; Negative test - we can't show that the internal branch executes, so we can't
+; fold the test to a loop invariant one.
+define void @test1_neg(i64 %start) {
+; CHECK-LABEL: @test1_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %skip
+skip:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %backedge
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+ br label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+; Slightly subtle version of @test4 where the icmp dominates the backedge,
+; but the exit branch doesn't.
+define void @test2_neg(i64 %start) {
+; CHECK-LABEL: @test2_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp, label %backedge, label %skip
+skip:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+ br i1 %cmp1, label %for.end, label %backedge
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+ br label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+; The branch has to exit the loop if the condition is true
+define void @test3_neg(i64 %start) {
+; CHECK-LABEL: @test3_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %loop, label %for.end
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test4_neg(i64 %start) {
+; CHECK-LABEL: @test4_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, 1
+ %cmp = icmp eq i64 %indvars.iv.next, 25
+ br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+ ; prevent flattening, needed to make sure we're testing what we intend
+ call void @foo()
+; CHECK: %cmp1 = icmp sgt i64 %indvars.iv, -1
+ %cmp1 = icmp sgt i64 %indvars.iv, -1
+
+; %cmp1 can be made loop invariant only if the branch below goes to
+; %the header when %cmp1 is true.
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test5_neg(i64 %start, i64 %inc) {
+; CHECK-LABEL: @test5_neg
+entry:
+ br label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+define void @test8(i64 %start, i64* %inc_ptr) {
+; CHECK-LABEL: @test8
+entry:
+ %inc = load i64, i64* %inc_ptr, !range !1
+ %ok = icmp sge i64 %inc, 0
+ br i1 %ok, label %loop, label %for.end
+
+loop:
+ %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+ %cmp1 = icmp slt i64 %indvars.iv, -1
+ br i1 %cmp1, label %for.end, label %loop
+
+for.end: ; preds = %if.end, %entry
+ ret void
+}
+
+!1 = !{i64 -1, i64 100}
+
+
+declare void @foo()
diff --git a/test/Transforms/IndVarSimplify/pr24356.ll b/test/Transforms/IndVarSimplify/pr24356.ll
new file mode 100644
index 000000000000..eac4204c0e16
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24356.ll
@@ -0,0 +1,63 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind ssp uwtable
+define void @fn1() {
+; CHECK-LABEL: @fn1(
+bb:
+ br label %bb4.preheader
+
+bb4.preheader: ; preds = %bb, %bb16
+; CHECK-LABEL: bb4.preheader:
+ %b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
+; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
+; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1
+
+ %tmp9 = icmp ugt i8 %b.03, 1
+ br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge
+
+bb4.preheader.bb4.preheader.split_crit_edge: ; preds = %bb4.preheader
+ br label %bb4.preheader.split
+
+bb4.preheader.bb18.loopexit.split_crit_edge: ; preds = %bb4.preheader
+ store i32 0, i32* @a, align 4
+ br label %bb18.loopexit.split
+
+bb4.preheader.split: ; preds = %bb4.preheader.bb4.preheader.split_crit_edge
+ br label %bb7
+
+bb4: ; preds = %bb7
+ %tmp6 = icmp slt i32 %storemerge2, 0
+ br i1 %tmp6, label %bb7, label %bb16
+
+bb7: ; preds = %bb4.preheader.split, %bb4
+ %storemerge2 = phi i32 [ 0, %bb4.preheader.split ], [ %tmp14, %bb4 ]
+ %tmp14 = add nsw i32 %storemerge2, 1
+ br i1 false, label %bb18.loopexit, label %bb4
+
+bb16: ; preds = %bb4
+ %tmp14.lcssa5 = phi i32 [ %tmp14, %bb4 ]
+ %tmp17 = add i8 %b.03, -1
+ %tmp2 = icmp eq i8 %tmp17, -2
+ br i1 %tmp2, label %bb18.loopexit1, label %bb4.preheader
+
+bb18.loopexit: ; preds = %bb7
+ br label %bb18.loopexit.split
+
+bb18.loopexit.split: ; preds = %bb4.preheader.bb18.loopexit.split_crit_edge, %bb18.loopexit
+ br label %bb18
+
+bb18.loopexit1: ; preds = %bb16
+ %tmp14.lcssa5.lcssa = phi i32 [ %tmp14.lcssa5, %bb16 ]
+ store i32 %tmp14.lcssa5.lcssa, i32* @a, align 4
+ br label %bb18
+
+bb18: ; preds = %bb18.loopexit1, %bb18.loopexit.split
+ ret void
+}
+
+declare void @abort()
diff --git a/test/Transforms/IndVarSimplify/pr24783.ll b/test/Transforms/IndVarSimplify/pr24783.ll
new file mode 100644
index 000000000000..637cb1e196c5
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24783.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @f(i32* %end.s, i8** %loc, i32 %p) {
+; CHECK-LABEL: @f(
+entry:
+; CHECK: [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64
+; CHECK: [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]]
+
+ %end = getelementptr inbounds i32, i32* %end.s, i32 %p
+ %init = bitcast i32* %end.s to i8*
+ br label %while.body.i
+
+while.body.i:
+ %ptr = phi i8* [ %ptr.inc, %while.body.i ], [ %init, %entry ]
+ %ptr.inc = getelementptr inbounds i8, i8* %ptr, i8 1
+ %ptr.inc.cast = bitcast i8* %ptr.inc to i32*
+ %cmp.i = icmp eq i32* %ptr.inc.cast, %end
+ br i1 %cmp.i, label %loop.exit, label %while.body.i
+
+loop.exit:
+; CHECK: loop.exit:
+; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8*
+; CHECK: store i8* [[END_BCASTED]], i8** %loc
+ %ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ]
+ store i8* %ptr.inc.lcssa, i8** %loc
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24804.ll b/test/Transforms/IndVarSimplify/pr24804.ll
new file mode 100644
index 000000000000..6f89481853ad
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24804.ll
@@ -0,0 +1,25 @@
+; RUN: opt -indvars -loop-idiom -loop-deletion -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Checking for a crash
+
+define void @f(i32* %a) {
+; CHECK-LABEL: @f(
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %for.cond, %entry
+ %iv = phi i32 [ 0, %entry ], [ %add, %for.inc ], [ %iv, %for.cond ]
+ %add = add nsw i32 %iv, 1
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ br i1 undef, label %for.cond, label %for.inc
+
+for.inc: ; preds = %for.cond
+ br i1 undef, label %for.cond, label %for.end
+
+for.end: ; preds = %for.inc
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24952.ll b/test/Transforms/IndVarSimplify/pr24952.ll
new file mode 100644
index 000000000000..c430cae98f58
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24952.ll
@@ -0,0 +1,27 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+declare void @use(i1)
+
+define void @f() {
+; CHECK-LABEL: @f(
+ entry:
+ %x = alloca i32
+ %y = alloca i32
+ br label %loop
+
+ loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+
+ %x.gep = getelementptr i32, i32* %x, i32 %iv
+ %eql = icmp eq i32* %x.gep, %y
+; CHECK-NOT: @use(i1 true)
+ call void @use(i1 %eql)
+
+ ; %be.cond deliberately 'false' -- we want want the trip count to be 0.
+ %be.cond = icmp ult i32 %iv, 0
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24956.ll b/test/Transforms/IndVarSimplify/pr24956.ll
new file mode 100644
index 000000000000..58688912cc37
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24956.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Check that this test does not crash.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define void @re_update_line(i8* %new) {
+; CHECK: @re_update_line(
+entry:
+ %incdec.ptr6 = getelementptr inbounds i8, i8* %new, i64 1
+ br label %for.cond.11.preheader
+
+for.cond.11.preheader: ; preds = %for.inc.26, %entry
+ %n.154 = phi i8* [ %new, %entry ], [ %incdec.ptr27, %for.inc.26 ]
+ %cmp12.52 = icmp ult i8* %n.154, %incdec.ptr6
+ br i1 %cmp12.52, label %land.rhs.16.lr.ph, label %for.inc.26
+
+land.rhs.16.lr.ph: ; preds = %for.cond.11.preheader
+ br label %land.rhs.16
+
+for.cond.11: ; preds = %land.rhs.16
+ %incdec.ptr24 = getelementptr inbounds i8, i8* %p.053, i64 1
+ %cmp12 = icmp ult i8* %p.053, %new
+ br i1 %cmp12, label %land.rhs.16, label %for.inc.26
+
+land.rhs.16: ; preds = %for.cond.11, %land.rhs.16.lr.ph
+ %p.053 = phi i8* [ %n.154, %land.rhs.16.lr.ph ], [ %incdec.ptr24, %for.cond.11 ]
+ br i1 undef, label %for.cond.11, label %for.inc.26
+
+for.inc.26: ; preds = %land.rhs.16, %for.cond.11, %for.cond.11.preheader
+ %incdec.ptr27 = getelementptr inbounds i8, i8* %n.154, i64 1
+ br i1 false, label %for.cond.11.preheader, label %for.end.28
+
+for.end.28: ; preds = %for.inc.26
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25047.ll b/test/Transforms/IndVarSimplify/pr25047.ll
new file mode 100644
index 000000000000..dc39a78c7eb9
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25047.ll
@@ -0,0 +1,49 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @fn1(i1 %c0, i1 %c1) {
+; CHECK-LABEL: @fn1(
+entry:
+ br i1 %c0, label %for.end.34, label %for.cond.1thread-pre-split
+
+for.cond.loopexit: ; preds = %for.end.29, %for.end.7
+ %f.lcssa = phi i32 [ %f.1, %for.end.29 ], [ %f.1, %for.end.7 ]
+ br i1 %c1, label %for.end.34, label %for.cond.1thread-pre-split
+
+for.cond.1thread-pre-split: ; preds = %for.cond.loopexit, %entry
+ %f.047 = phi i32 [ %f.lcssa, %for.cond.loopexit ], [ 0, %entry ]
+ br label %for.cond.1
+
+for.cond.1: ; preds = %for.cond.1, %for.cond.1thread-pre-split
+ br i1 %c1, label %for.cond.4, label %for.cond.1
+
+for.cond.4: ; preds = %for.end.29, %for.cond.1
+ %f.1 = phi i32 [ 0, %for.end.29 ], [ %f.047, %for.cond.1 ]
+ br label %for.cond.5
+
+for.cond.5: ; preds = %for.cond.5, %for.cond.4
+ %h.0 = phi i32 [ 0, %for.cond.4 ], [ %inc, %for.cond.5 ]
+ %cmp = icmp slt i32 %h.0, 1
+ %inc = add nsw i32 %h.0, 1
+ br i1 %cmp, label %for.cond.5, label %for.end.7
+
+for.end.7: ; preds = %for.cond.5
+ %g.lcssa = phi i32 [ %h.0, %for.cond.5 ]
+ %tobool10 = icmp eq i32 %g.lcssa, 0
+ br i1 %tobool10, label %for.end.8, label %for.cond.loopexit
+
+for.end.8: ; preds = %for.end.7
+ br i1 %c1, label %for.cond.19, label %for.end.29
+
+for.cond.19: ; preds = %for.cond.19, %for.end.8
+ br label %for.cond.19
+
+for.end.29: ; preds = %for.end.8
+ %tobool30 = icmp eq i32 %f.1, 0
+ br i1 %tobool30, label %for.cond.4, label %for.cond.loopexit
+
+for.end.34: ; preds = %for.cond.loopexit, %entry
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25051.ll b/test/Transforms/IndVarSimplify/pr25051.ll
new file mode 100644
index 000000000000..a02d539a66dd
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25051.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define i32 @somefunc(double* %arr) {
+; CHECK-LABEL: @somefunc(
+entry:
+ br label %for.cond.1.preheader
+
+for.cond.1.preheader: ; preds = %for.inc.9, %entry
+ %index3.013 = phi i32 [ 0, %entry ], [ %index3.1.lcssa, %for.inc.9 ]
+ %index.012 = phi i32 [ 0, %entry ], [ %inc10, %for.inc.9 ]
+ %cmp2.9 = icmp sgt i32 %index.012, 0
+ br i1 %cmp2.9, label %for.body.3.lr.ph, label %for.inc.9
+
+for.body.3.lr.ph: ; preds = %for.cond.1.preheader
+ %idxprom5 = sext i32 %index.012 to i64
+ br label %for.body.3
+
+for.body.3: ; preds = %for.body.3, %for.body.3.lr.ph
+ %index3.111 = phi i32 [ %index3.013, %for.body.3.lr.ph ], [ %inc, %for.body.3 ]
+ %index2.010 = phi i32 [ 0, %for.body.3.lr.ph ], [ %inc8, %for.body.3 ]
+ %inc = add nsw i32 %index3.111, 1
+ %idxprom = sext i32 %index3.111 to i64
+ %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
+ %idxprom4 = sext i32 %index2.010 to i64
+ %inc8 = add nsw i32 %index2.010, 1
+ %cmp2 = icmp slt i32 %inc8, %index.012
+ br i1 %cmp2, label %for.body.3, label %for.inc.9.loopexit
+
+for.inc.9.loopexit: ; preds = %for.body.3
+ %inc.lcssa = phi i32 [ %inc, %for.body.3 ]
+ br label %for.inc.9
+
+for.inc.9: ; preds = %for.inc.9.loopexit, %for.cond.1.preheader
+ %index3.1.lcssa = phi i32 [ %index3.013, %for.cond.1.preheader ], [ %inc.lcssa, %for.inc.9.loopexit ]
+ %inc10 = add nsw i32 %index.012, 1
+ %cmp = icmp slt i32 %inc10, 10
+ br i1 %cmp, label %for.cond.1.preheader, label %for.end.11
+
+for.end.11: ; preds = %for.inc.9
+ ret i32 1
+}
diff --git a/test/Transforms/IndVarSimplify/pr25060.ll b/test/Transforms/IndVarSimplify/pr25060.ll
new file mode 100644
index 000000000000..25863fff2d36
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25060.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+define i16 @fn1() {
+; CHECK-LABEL: @fn1(
+entry:
+ br label %bb1
+
+bb1:
+ %i = phi i16 [ 0, %entry ], [ 1, %bb1 ]
+ %storemerge = phi i16 [ %storemerge2, %bb1 ], [ 0, %entry ]
+ %storemerge2 = phi i16 [ 10, %entry ], [ 200, %bb1 ]
+ %tmp10 = icmp eq i16 %i, 1
+ br i1 %tmp10, label %bb5, label %bb1
+
+bb5:
+ %storemerge.lcssa = phi i16 [ %storemerge, %bb1 ]
+; CHECK: ret i16 10
+ ret i16 %storemerge.lcssa
+}
+
+define i16 @fn2() {
+; CHECK-LABEL: @fn2(
+entry:
+ br label %bb1
+
+bb1:
+ %canary = phi i16 [ 0, %entry ], [ %canary.inc, %bb1 ]
+ %i = phi i16 [ 0, %entry ], [ %storemerge, %bb1 ]
+ %storemerge = phi i16 [ 0, %bb1 ], [ 10, %entry ]
+ %canary.inc = add i16 %canary, 1
+ %_tmp10 = icmp eq i16 %i, 10
+ br i1 %_tmp10, label %bb5, label %bb1
+
+bb5:
+; CHECK: ret i16 1
+ ret i16 %canary
+}
diff --git a/test/Transforms/IndVarSimplify/pr25360.ll b/test/Transforms/IndVarSimplify/pr25360.ll
new file mode 100644
index 000000000000..9f6df7051ea8
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25360.ll
@@ -0,0 +1,33 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+
+; Ensure that does not crash
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f() {
+; CHECK-LABEL: @f(
+entry:
+ br label %for.end
+
+for.condt: ; preds = %for.end
+ br i1 true, label %for.cond.0, label %for.end
+
+for.end: ; preds = %for.body.3
+ %inc = select i1 undef, i32 2, i32 1
+ br i1 false, label %for.condt, label %for.cond.0
+
+for.cond.0: ; preds = %for.end, %for.condt
+ %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+ br i1 true, label %for.end.13, label %for.body.9
+
+for.body.9: ; preds = %for.body.9, %for.cond.0
+ %p1.addr.22 = phi i32 [ %inc10, %for.body.9 ], [ %init, %for.cond.0 ]
+ %inc10 = add i32 %p1.addr.22, 1
+ br i1 true, label %for.end.13, label %for.body.9
+
+for.end.13: ; preds = %for.cond.7.for.end.13_crit_edge, %for.cond.0
+ %p1.addr.2.lcssa = phi i32 [ %inc10, %for.body.9 ], [ %init, %for.cond.0 ]
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25421.ll b/test/Transforms/IndVarSimplify/pr25421.ll
new file mode 100644
index 000000000000..efb71f9c3039
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25421.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @use(i1)
+
+define void @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+ %conv = sext i32 %x to i64
+ %sub = add i64 %conv, -1
+ %ec = icmp sgt i32 %x, 0
+ br i1 %ec, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i64 %iv, 1
+ %cmp = icmp slt i64 %iv, %sub
+ call void @use(i1 %cmp)
+; CHECK: call void @use(i1 %cmp)
+; CHECK-NOT: call void @use(i1 true)
+
+ %be.cond = icmp slt i64 %iv.inc, %conv
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25578.ll b/test/Transforms/IndVarSimplify/pr25578.ll
new file mode 100644
index 000000000000..bc648b517bbe
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25578.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @foo
+define void @foo() {
+entry:
+ br label %L1_header
+
+L1_header:
+ br label %L2_header
+
+; CHECK: L2_header:
+; CHECK: %[[INDVAR:.*]] = phi i64
+; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32
+L2_header:
+ %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ]
+ %i_prom = sext i32 %i to i64
+ br label %L3_header
+
+L3_header:
+ br i1 undef, label %L3_latch, label %L2_exiting_1
+
+L3_latch:
+ br i1 undef, label %L3_header, label %L2_exiting_2
+
+L2_exiting_1:
+ br i1 undef, label %L2_latch, label %L1_latch
+
+L2_exiting_2:
+ br i1 undef, label %L2_latch, label %L1_latch
+
+L2_latch:
+ %i_next = add nsw i32 %i, 1
+ br label %L2_header
+
+L1_latch:
+; CHECK: L1_latch:
+; CHECK: %i_lcssa = phi i32 [ %[[TRUNC]], %L2_exiting_1 ], [ %[[TRUNC]], %L2_exiting_2 ]
+
+ %i_lcssa = phi i32 [ %i, %L2_exiting_1 ], [ %i, %L2_exiting_2 ]
+ br i1 undef, label %exit, label %L1_header
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/tripcount_infinite.ll b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
index 0495b50c3e48..658598d3b7e4 100644
--- a/test/Transforms/IndVarSimplify/tripcount_infinite.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
@@ -1,38 +1,45 @@
; These tests have an infinite trip count. We obviously shouldn't remove the
; loops! :)
;
-; RUN: opt < %s -indvars -adce -simplifycfg -S | grep icmp | wc -l > %t2
-; RUN: llvm-as < %s | llvm-dis | grep icmp | wc -l > %t1
-; RUN: diff %t1 %t2
+; RUN: opt < %s -indvars -adce -simplifycfg -S | FileCheck %s
;; test for (i = 1; i != 100; i += 2)
define i32 @infinite_linear() {
+; CHECK-LABEL: @infinite_linear(
entry:
br label %loop
loop: ; preds = %loop, %entry
+; CHECK-LABEL: loop:
%i = phi i32 [ 1, %entry ], [ %i.next, %loop ] ; <i32> [#uses=3]
%i.next = add i32 %i, 2 ; <i32> [#uses=1]
%c = icmp ne i32 %i, 100 ; <i1> [#uses=1]
+; CHECK: icmp
+; CHECK: br
br i1 %c, label %loop, label %loopexit
loopexit: ; preds = %loop
+; CHECK-LABEL: loopexit:
ret i32 %i
}
;; test for (i = 1; i*i != 63; ++i)
define i32 @infinite_quadratic() {
+; CHECK-LABEL: @infinite_quadratic(
entry:
br label %loop
loop: ; preds = %loop, %entry
+; CHECK-LABEL: loop:
%i = phi i32 [ 1, %entry ], [ %i.next, %loop ] ; <i32> [#uses=4]
%isquare = mul i32 %i, %i ; <i32> [#uses=1]
%i.next = add i32 %i, 1 ; <i32> [#uses=1]
%c = icmp ne i32 %isquare, 63 ; <i1> [#uses=1]
+; CHECK: icmp
+; CHECK: br
br i1 %c, label %loop, label %loopexit
loopexit: ; preds = %loop
+; CHECK-LABEL: loopexit:
ret i32 %i
}
-
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index eb81ceb700b0..b87cd0550192 100644
--- a/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -193,3 +193,163 @@ for.body:
for.end:
ret i32 %sum.0
}
+
+define i32 @test6(i32* %a, i32 %b) {
+; CHECK-LABEL: @test6(
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ br label %for.cond
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp sle i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret i32 %sum.0
+}
+
+define i32 @test7(i32* %a, i32 %b) {
+; CHECK-LABEL: @test7(
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+; CHECK: for.body:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ br label %for.cond
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ule i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp sle i32 %i.0, %b
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+}
+
+define i32 @test8(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test8(
+; CHECK: [[INIT_SEXT:%[a-z0-9]+]] = sext i32 %init to i64
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: for.cond:
+; Note: %indvars.iv is the sign extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_SEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+
+entry:
+ %e = icmp sgt i32 %init, 0
+ br i1 %e, label %for.cond, label %leave
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ule i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp slt i32 0, %inc
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+
+leave:
+ ret i32 0
+}
+
+define i32 @test9(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test9(
+; CHECK: [[INIT_ZEXT:%[a-z0-9]+]] = zext i32 %init to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; Note: %indvars.iv is the zero extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_ZEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp slt i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+ %e = icmp sgt i32 %init, 0
+ br i1 %e, label %for.cond, label %leave
+
+for.cond:
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, %b
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %idxprom = zext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %sum.0, %0
+ %inc = add nsw i32 %i.0, 1
+ %cmp2 = icmp slt i32 0, %inc
+ br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+ ret i32 %sum.0
+
+leave:
+ ret i32 0
+}
+
+declare void @consume.i64(i64)
+declare void @consume.i1(i1)
+
+define i32 @test10(i32 %v) {
+; CHECK-LABEL: @test10(
+ entry:
+; CHECK-NOT: zext
+ br label %loop
+
+ loop:
+; CHECK: loop:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: [[MUL:%[a-z0-9]+]] = mul nsw i64 %indvars.iv, -1
+; CHECK: [[MUL_TRUNC:%[a-z0-9]+]] = trunc i64 [[MUL]] to i32
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[MUL_TRUNC]], %v
+; CHECK: call void @consume.i1(i1 [[CMP]])
+
+ %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+ %i.inc = add i32 %i, 1
+ %iv = mul i32 %i, -1
+ %cmp = icmp eq i32 %iv, %v
+ call void @consume.i1(i1 %cmp)
+ %be.cond = icmp slt i32 %i.inc, 11
+ %ext = sext i32 %iv to i64
+ call void @consume.i64(i64 %ext)
+ br i1 %be.cond, label %loop, label %leave
+
+ leave:
+ ret i32 22
+}
diff --git a/test/Transforms/IndVarSimplify/zext-nuw.ll b/test/Transforms/IndVarSimplify/zext-nuw.ll
new file mode 100644
index 000000000000..13138de6a507
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -0,0 +1,49 @@
+; RUN: opt -indvars -S %s | FileCheck %s
+
+%struct.A = type { i8 }
+
+@c = global %struct.A* null
+@d = global i32 4
+
+define void @_Z3fn1v() {
+ %x2 = load i32, i32* @d
+ %x3 = icmp slt i32 %x2, 1
+ %x4 = select i1 %x3, i32 1, i32 %x2
+ %x5 = load %struct.A*, %struct.A** @c
+ %j.sroa.0.0..sroa_idx = getelementptr %struct.A, %struct.A* %x5, i64 0, i32 0
+ %j.sroa.0.0.copyload = load i8, i8* %j.sroa.0.0..sroa_idx
+ br label %.preheader4.lr.ph
+
+.preheader4.lr.ph: ; preds = %0
+ ; CHECK-NOT: add i64 {{.*}}, 4294967296
+ br label %.preheader4
+
+.preheader4: ; preds = %x22, %.preheader4.lr.ph
+ %k.09 = phi i8* [ undef, %.preheader4.lr.ph ], [ %x25, %x22 ]
+ %x8 = icmp ult i32 0, 4
+ br i1 %x8, label %.preheader.lr.ph, label %x22
+
+.preheader.lr.ph: ; preds = %.preheader4
+ br label %.preheader
+
+.preheader: ; preds = %x17, %.preheader.lr.ph
+ %k.17 = phi i8* [ %k.09, %.preheader.lr.ph ], [ %x19, %x17 ]
+ %v.06 = phi i32 [ 0, %.preheader.lr.ph ], [ %x20, %x17 ]
+ br label %x17
+
+x17: ; preds = %.preheader
+ %x18 = sext i8 %j.sroa.0.0.copyload to i64
+ %x19 = getelementptr i8, i8* %k.17, i64 %x18
+ %x20 = add i32 %v.06, 1
+ %x21 = icmp ult i32 %x20, %x4
+ br i1 %x21, label %.preheader, label %._crit_edge.8
+
+._crit_edge.8: ; preds = %x17
+ %split = phi i8* [ %x19, %x17 ]
+ br label %x22
+
+x22: ; preds = %._crit_edge.8, %.preheader4
+ %k.1.lcssa = phi i8* [ %split, %._crit_edge.8 ], [ %k.09, %.preheader4 ]
+ %x25 = getelementptr i8, i8* %k.1.lcssa
+ br label %.preheader4
+}
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/InferFunctionAttrs/annotate.ll
index 9fba7a9f2882..963f484eb55e 100644
--- a/test/Transforms/FunctionAttrs/annotate-1.ll
+++ b/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
+; RUN: opt < %s -inferattrs -S | FileCheck %s
+; RUN: opt < %s -passes=inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
declare i8* @fopen(i8*, i8*)
; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]]
diff --git a/test/Transforms/Inline/alloca-dbgdeclare-merge.ll b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
new file mode 100644
index 000000000000..5314f0b8397d
--- /dev/null
+++ b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
@@ -0,0 +1,102 @@
+; Test that alloca merging in the inliner places dbg.declare calls immediately
+; after the merged alloca. Not at the end of the entry BB, and definitely not
+; before the alloca.
+;
+; clang -g -S -emit-llvm -Xclang -disable-llvm-optzns
+;
+;__attribute__((always_inline)) void f() {
+; char aaa[100];
+; aaa[10] = 1;
+;}
+;
+;__attribute__((always_inline)) void g() {
+; char bbb[100];
+; bbb[20] = 1;
+;}
+;
+;void h() {
+; f();
+; g();
+;}
+;
+; RUN: opt -always-inline -S < %s | FileCheck %s
+;
+; CHECK: define void @h()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %[[AI:.*]] = alloca [100 x i8]
+; CHECK-NEXT: call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]],
+; CHECK-NEXT: call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]],
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @f() #0 !dbg !4 {
+entry:
+ %aaa = alloca [100 x i8], align 16
+ call void @llvm.dbg.declare(metadata [100 x i8]* %aaa, metadata !12, metadata !17), !dbg !18
+ %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %aaa, i64 0, i64 10, !dbg !19
+ store i8 1, i8* %arrayidx, align 2, !dbg !20
+ ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @g() #0 !dbg !7 {
+entry:
+ %bbb = alloca [100 x i8], align 16
+ call void @llvm.dbg.declare(metadata [100 x i8]* %bbb, metadata !22, metadata !17), !dbg !23
+ %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %bbb, i64 0, i64 20, !dbg !24
+ store i8 1, i8* %arrayidx, align 4, !dbg !25
+ ret void, !dbg !26
+}
+
+; Function Attrs: nounwind uwtable
+define void @h() #2 !dbg !8 {
+entry:
+ call void @f(), !dbg !27
+ call void @g(), !dbg !28
+ ret void, !dbg !29
+}
+
+attributes #0 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "../1.c", directory: "/code/llvm-git/build")
+!2 = !{}
+!3 = !{!4, !7, !8}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, variables: !2)
+!8 = distinct !DISubprogram(name: "h", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, isOptimized: false, variables: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
+!12 = !DILocalVariable(name: "aaa", scope: !4, file: !1, line: 2, type: !13)
+!13 = !DICompositeType(tag: DW_TAG_array_type, baseType: !14, size: 800, align: 8, elements: !15)
+!14 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!15 = !{!16}
+!16 = !DISubrange(count: 100)
+!17 = !DIExpression()
+!18 = !DILocation(line: 2, column: 8, scope: !4)
+!19 = !DILocation(line: 3, column: 3, scope: !4)
+!20 = !DILocation(line: 3, column: 11, scope: !4)
+!21 = !DILocation(line: 4, column: 1, scope: !4)
+!22 = !DILocalVariable(name: "bbb", scope: !7, file: !1, line: 7, type: !13)
+!23 = !DILocation(line: 7, column: 8, scope: !7)
+!24 = !DILocation(line: 8, column: 3, scope: !7)
+!25 = !DILocation(line: 8, column: 11, scope: !7)
+!26 = !DILocation(line: 9, column: 1, scope: !7)
+!27 = !DILocation(line: 12, column: 3, scope: !8)
+!28 = !DILocation(line: 13, column: 3, scope: !8)
+!29 = !DILocation(line: 14, column: 1, scope: !8)
diff --git a/test/Transforms/Inline/alloca-dbgdeclare.ll b/test/Transforms/Inline/alloca-dbgdeclare.ll
index 286f2931ff22..39575311b4b4 100644
--- a/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -34,7 +34,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
; Function Attrs: nounwind
-define void @_Z3fn4v() #0 {
+define void @_Z3fn4v() #0 !dbg !21 {
entry:
; Test that the dbg.declare is moved together with the alloca.
; CHECK: define void @_Z3fn5v()
@@ -61,7 +61,7 @@ _Z3fn31A.exit: ; preds = %entry, %if.then.i
}
; Function Attrs: noreturn nounwind
-define void @_Z3fn5v() #3 {
+define void @_Z3fn5v() #3 !dbg !24 {
entry:
br label %while.body, !dbg !55
@@ -82,7 +82,7 @@ attributes #3 = { noreturn nounwind }
!llvm.module.flags = !{!28, !29}
!llvm.ident = !{!30}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4}
@@ -97,16 +97,16 @@ attributes #3 = { noreturn nounwind }
!12 = !{!13}
!13 = !DISubrange(count: 2)
!14 = !{!15, !21, !24}
-!15 = !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, function: void (%struct.A*)* @_Z3fn31A, variables: !19)
+!15 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, variables: !19)
!16 = !DIFile(filename: "test.cpp", directory: "")
!17 = !DISubroutineType(types: !18)
!18 = !{null, !"_ZTS1A"}
!19 = !{!20}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
-!21 = !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, function: void ()* @_Z3fn4v, variables: !2)
+!20 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!21 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, variables: !2)
!22 = !DISubroutineType(types: !23)
!23 = !{null}
-!24 = !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, function: void ()* @_Z3fn5v, variables: !2)
+!24 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, variables: !2)
!25 = !{!26, !27}
!26 = !DIGlobalVariable(name: "a", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @a)
!27 = !DIGlobalVariable(name: "b", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @b)
@@ -128,7 +128,7 @@ attributes #3 = { noreturn nounwind }
!43 = !{!37, !37, i64 0}
!44 = !{!38, !38, i64 0}
!45 = !DILocation(line: 9, scope: !15)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!46 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
!47 = distinct !DILocation(line: 11, scope: !21)
!48 = !DIExpression(DW_OP_bit_piece, 32, 160)
!49 = !DILocation(line: 6, scope: !15, inlinedAt: !47)
diff --git a/test/Transforms/Inline/debug-info-duplicate-calls.ll b/test/Transforms/Inline/debug-info-duplicate-calls.ll
index 4e3d9d92fdb3..442ff325863c 100644
--- a/test/Transforms/Inline/debug-info-duplicate-calls.ll
+++ b/test/Transforms/Inline/debug-info-duplicate-calls.ll
@@ -33,10 +33,10 @@
; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs1_f2:![0-9]+]]
; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs2_f2:![0-9]+]]
-; CHECK-DAG: [[F:![0-9]+]] = !DISubprogram(name: "f"
-; CHECK-DAG: [[F2:![0-9]+]] = !DISubprogram(name: "f2"
-; CHECK-DAG: [[F3:![0-9]+]] = !DISubprogram(name: "f3"
-; CHECK-DAG: [[F4:![0-9]+]] = !DISubprogram(name: "f4"
+; CHECK-DAG: [[F:![0-9]+]] = distinct !DISubprogram(name: "f"
+; CHECK-DAG: [[F2:![0-9]+]] = distinct !DISubprogram(name: "f2"
+; CHECK-DAG: [[F3:![0-9]+]] = distinct !DISubprogram(name: "f3"
+; CHECK-DAG: [[F4:![0-9]+]] = distinct !DISubprogram(name: "f4"
; CHECK: [[fcs1_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs1:![0-9]+]])
; CHECK: [[fcs1_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4:![0-9]+]])
@@ -59,7 +59,7 @@ $_Z2f3v = comdat any
$_Z2f2v = comdat any
; Function Attrs: uwtable
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !4 {
entry:
call void @_Z2f4v(), !dbg !13
call void @_Z2f4v(), !dbg !13
@@ -67,14 +67,14 @@ entry:
}
; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f4v() #1 comdat {
+define linkonce_odr void @_Z2f4v() #1 comdat !dbg !7 {
entry:
call void @_Z2f3v(), !dbg !15
ret void, !dbg !16
}
; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f3v() #1 comdat {
+define linkonce_odr void @_Z2f3v() #1 comdat !dbg !8 {
entry:
call void @_Z2f2v(), !dbg !17
call void @_Z2f2v(), !dbg !17
@@ -82,7 +82,7 @@ entry:
}
; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f2v() #1 comdat {
+define linkonce_odr void @_Z2f2v() #1 comdat !dbg !9 {
entry:
call void @_Z2f1v(), !dbg !19
ret void, !dbg !20
@@ -98,16 +98,16 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4, !7, !8, !9}
-!4 = !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, function: void ()* @_Z1fv, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: void ()* @_Z2f4v, variables: !2)
-!8 = !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @_Z2f3v, variables: !2)
-!9 = !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!7 = distinct !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)"}
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
index ca407acdd659..c547559d8c2a 100644
--- a/test/Transforms/Inline/debug-invoke.ll
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -4,7 +4,7 @@
; CHECK: invoke void @test()
; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
-; CHECK: [[SP:.*]] = !DISubprogram(
+; CHECK: [[SP:.*]] = distinct !DISubprogram(
; CHECK: [[INL_LOC]] = !DILocation(line: 1, scope: [[SP]], inlinedAt: [[INL_AT:.*]])
; CHECK: [[INL_AT]] = distinct !DILocation(line: 2, scope: [[SP]])
@@ -32,6 +32,6 @@ lpad:
!llvm.module.flags = !{!1}
!1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = !DISubprogram()
+!2 = distinct !DISubprogram()
!3 = !DILocation(line: 1, scope: !2)
!4 = !DILocation(line: 2, scope: !2)
diff --git a/test/Transforms/Inline/deopt-bundles.ll b/test/Transforms/Inline/deopt-bundles.ll
new file mode 100644
index 000000000000..3e3c52f7d2d5
--- /dev/null
+++ b/test/Transforms/Inline/deopt-bundles.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @f()
+declare i32 @g()
+declare fastcc i32 @g.fastcc()
+
+define i32 @callee_0() alwaysinline {
+ entry:
+ call void @f()
+ ret i32 2
+}
+
+define i32 @caller_0() {
+; CHECK-LABEL: @caller_0(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 2
+ %x = call i32 @callee_0() [ "deopt"(i32 5) ]
+ ret i32 %x
+}
+
+define i32 @callee_1() alwaysinline {
+ entry:
+ call void @f() [ "deopt"() ]
+ call void @f() [ "deopt"(i32 0, i32 1) ]
+ call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 2
+}
+
+define i32 @caller_1() {
+; CHECK-LABEL: @caller_1(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5) ]
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1) ]
+; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT: ret i32 2
+
+ %x = call i32 @callee_1() [ "deopt"(i32 5) ]
+ ret i32 %x
+}
+
+define i32 @callee_2() alwaysinline {
+ entry:
+ %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_2(i32 %val) {
+; CHECK-LABEL: @caller_2(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT: ret i32 [[RVAL]]
+ %x = call i32 @callee_2() [ "deopt"(i32 %val) ]
+ ret i32 %x
+}
+
+define i32 @callee_3() alwaysinline {
+ entry:
+ %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_3() personality i8 3 {
+; CHECK-LABEL: @caller_3(
+ entry:
+ %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+
+ normal:
+ ret i32 %x
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 101
+}
+
+define i32 @callee_4() alwaysinline personality i8 3 {
+ entry:
+ %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+ ret i32 %v
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 100
+}
+
+define i32 @caller_4() {
+; CHECK-LABEL: @caller_4(
+ entry:
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_4() [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_5() alwaysinline personality i8 3 {
+ entry:
+ %v = invoke fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+ ret i32 %v
+
+ unwind:
+ %cleanup = landingpad i8 cleanup
+ ret i32 100
+}
+
+define i32 @caller_5() {
+; CHECK-LABEL: @caller_5(
+ entry:
+; CHECK: invoke fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX:[0-9]+]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_5() [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_6() alwaysinline personality i8 3 {
+ entry:
+ %v = call fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_6() {
+; CHECK-LABEL: @caller_6(
+ entry:
+; CHECK: call fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+ %x = call i32 @callee_6() [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_7(i1 %val) alwaysinline personality i8 3 {
+; We want something that PruningFunctionCloner is not smart enough to
+; recognize, but can be recognized by recursivelySimplifyInstruction.
+
+ entry:
+ br i1 %val, label %check, label %precheck
+
+ precheck:
+ br label %check
+
+ check:
+ %p = phi i1 [ %val, %entry ], [ true, %precheck ]
+ br i1 %p, label %do.not, label %do
+
+ do.not:
+ ret i32 0
+
+ do:
+ %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ ret i32 %v
+}
+
+define i32 @caller_7() {
+; CHECK-LABEL: @caller_7(
+ entry:
+; CHECK-NOT: call fastcc i32 @g.fastcc()
+; CHECK: ret i32 0
+ %x = call i32 @callee_7(i1 true) [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+define i32 @callee_8(i1 %val) alwaysinline personality i8 3 {
+; We want something that PruningFunctionCloner is not smart enough to
+; recognize, but can be recognized by recursivelySimplifyInstruction.
+
+ entry:
+ br i1 %val, label %check, label %precheck
+
+ precheck:
+ br label %check
+
+ check:
+ %p = phi i1 [ %val, %entry ], [ true, %precheck ]
+ br i1 %p, label %do.not, label %do
+
+ do.not:
+ ret i32 0
+
+ do:
+ %phi = phi i32 [ 0, %check ], [ %v, %do ]
+ %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+ %ic = icmp eq i32 %v, 42
+ br i1 %ic, label %do, label %done
+
+ done:
+ ret i32 %phi
+}
+
+define i32 @caller_8() {
+; CHECK-LABEL: @caller_8(
+ entry:
+; CHECK-NOT: call fastcc i32 @g.fastcc()
+; CHECK: ret i32 0
+ %x = call i32 @callee_8(i1 true) [ "deopt"(i32 7) ]
+ ret i32 %x
+}
+
+attributes #0 = { "foo"="bar" }
+
+; CHECK: attributes #[[FOO_BAR_ATTR_IDX]] = { "foo"="bar" }
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index eb92bc52cc94..f4f046846e82 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -12,11 +12,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
entry:
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%add = fadd <4 x float> %mul, %mul1
ret <4 x float> %add
}
@@ -27,10 +27,10 @@ define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
; CHECK: ret float
entry:
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
- call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
%vecext = extractelement <4 x float> %call, i32 0
%vecext1 = extractelement <4 x float> %call, i32 1
%add = fadd float %vecext, %vecext1
@@ -47,10 +47,10 @@ attributes #0 = { nounwind readnone }
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!""}
-!6 = !DISubprogram()
+!6 = distinct !DISubprogram()
diff --git a/test/Transforms/Inline/inline-assume.ll b/test/Transforms/Inline/inline-assume.ll
new file mode 100644
index 000000000000..4a7dc3edb22d
--- /dev/null
+++ b/test/Transforms/Inline/inline-assume.ll
@@ -0,0 +1,31 @@
+; RUN: opt -inline -S -o - < %s | FileCheck %s
+
+%0 = type opaque
+%struct.Foo = type { i32, %0* }
+
+; Test that we don't crash when inlining @bar (rdar://22521387).
+define void @foo(%struct.Foo* align 4 %a) {
+entry:
+ call fastcc void @bar(%struct.Foo* nonnull align 4 undef)
+
+; CHECK: call void @llvm.assume(i1 undef)
+; CHECK: unreachable
+
+ ret void
+}
+
+define fastcc void @bar(%struct.Foo* align 4 %a) {
+; CHECK-LABEL: @bar
+entry:
+ %b = getelementptr inbounds %struct.Foo, %struct.Foo* %a, i32 0, i32 1
+ br i1 undef, label %if.end, label %if.then.i.i
+
+if.then.i.i:
+ call void @llvm.assume(i1 undef)
+ unreachable
+
+if.end:
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll
new file mode 100644
index 000000000000..1fd9f105db50
--- /dev/null
+++ b/test/Transforms/Inline/inline-cold-callee.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s
+
+; This tests that a cold callee gets the (lower) inlinecold-threshold even without
+; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
+; A callee with identical body does gets inlined because cost fits within the
+; inline-threshold
+
+define i32 @callee1(i32 %x) !prof !1 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) !prof !2 {
+; CHECK-LABEL: @callee2(
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) !prof !2 {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %y2 = call i32 @callee2(i32 %y1)
+ %y3 = call i32 @callee1(i32 %y2)
+ ret i32 %y3
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 100}
+!2 = !{!"function_entry_count", i64 1}
+
diff --git a/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
new file mode 100644
index 000000000000..498a995ecd45
--- /dev/null
+++ b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+
+target datalayout = "e-p3:32:32-p4:64:64-n32"
+
+@lds = internal addrspace(3) global [64 x i64] zeroinitializer
+
+; CHECK-LABEL: @constexpr_addrspacecast_ptr_size_change(
+; CHECK: load i64, i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*), i64 undef)
+; CHECK-NEXT: br
+define void @constexpr_addrspacecast_ptr_size_change() #0 {
+ %tmp0 = call i32 @foo(i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*)) #1
+ ret void
+}
+
+define i32 @foo(i64 addrspace(4)* %arg) #1 {
+bb:
+ %tmp = getelementptr i64, i64 addrspace(4)* %arg, i64 undef
+ %tmp1 = load i64, i64 addrspace(4)* %tmp
+ br i1 undef, label %bb2, label %bb3
+
+bb2:
+ store i64 0, i64 addrspace(4)* %tmp
+ br label %bb3
+
+bb3:
+ unreachable
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { alwaysinline nounwind }
diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll
new file mode 100644
index 000000000000..93ea9d43c78d
--- /dev/null
+++ b/test/Transforms/Inline/inline-hot-callee.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
+
+; This tests that a hot callee gets the (higher) inlinehint-threshold even without
+; inline hints and gets inlined because the cost is less than inlinehint-threshold.
+; A cold callee with identical body does not get inlined because cost exceeds the
+; inline-threshold
+
+define i32 @callee1(i32 %x) !prof !1 {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) !prof !2 {
+; CHECK-LABEL: @callee2(
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) !prof !2 {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %y2 = call i32 @callee2(i32 %y1)
+ %y3 = call i32 @callee1(i32 %y2)
+ ret i32 %y3
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 10}
+!1 = !{!"function_entry_count", i64 10}
+!2 = !{!"function_entry_count", i64 1}
+
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index 820e56f7f8ef..b01a1f657f31 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -3,7 +3,7 @@
; The inline threshold for a function with the optsize attribute is currently
; the same as the global inline threshold for -Os. Check that the optsize
-; function attribute don't alter the function specific inline threshold if the
+; function attribute doesn't alter the function-specific inline threshold if the
; global inline threshold is lower (as for -Oz).
@a = global i32 4
diff --git a/test/Transforms/Inline/inline_dbg_declare.ll b/test/Transforms/Inline/inline_dbg_declare.ll
index f3ad7ef8b8fc..3c701c41459a 100644
--- a/test/Transforms/Inline/inline_dbg_declare.ll
+++ b/test/Transforms/Inline/inline_dbg_declare.ll
@@ -23,7 +23,7 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
target triple = "i686-pc-windows-msvc"
; Function Attrs: nounwind
-define float @foo(float %x) #0 {
+define float @foo(float %x) #0 !dbg !4 {
entry:
%x.addr = alloca float, align 4
store float %x, float* %x.addr, align 4
@@ -38,10 +38,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
; CHECK: define void @bar
; Function Attrs: nounwind
-define void @bar(float* %dst) #0 {
+define void @bar(float* %dst) #0 !dbg !9 {
entry:
; CHECK: [[x_addr_i:%[a-zA-Z0-9.]+]] = alloca float, align 4
+; CHECK-NEXT: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !{{[0-9]+}}), !dbg [[m24:![0-9]+]]
%dst.addr = alloca float*, align 4
store float* %dst, float** %dst.addr, align 4
@@ -52,7 +53,6 @@ entry:
%call = call float @foo(float %1), !dbg !22
; CHECK-NOT: call float @foo
-; CHECK: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !{{[0-9]+}}), !dbg [[m24:![0-9]+]]
%2 = load float*, float** %dst.addr, align 4, !dbg !22
%arrayidx1 = getelementptr inbounds float, float* %2, i32 0, !dbg !22
@@ -67,33 +67,33 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "")
!2 = !{}
!3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: float (float)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "foo.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!9 = !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, function: void (float*)* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{null, !12}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
!13 = !{i32 2, !"Dwarf Version", i32 4}
!14 = !{i32 2, !"Debug Info Version", i32 3}
!15 = !{!"clang version 3.6.0 (trunk)"}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
!17 = !DIExpression()
!18 = !DILocation(line: 1, column: 17, scope: !4)
!19 = !DILocation(line: 3, column: 5, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12)
+!20 = !DILocalVariable(name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12)
!21 = !DILocation(line: 6, column: 17, scope: !9)
!22 = !DILocation(line: 8, column: 14, scope: !9)
!23 = !DILocation(line: 9, column: 1, scope: !9)
-; CHECK: [[FOO:![0-9]+]] = !DISubprogram(name: "foo",
-; CHECK: [[BAR:![0-9]+]] = !DISubprogram(name: "bar",
-; CHECK: [[m23]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: [[FOO]]
-; CHECK: [[CALL_SITE:![0-9]+]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
-; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE]])
+; CHECK: [[FOO:![0-9]+]] = distinct !DISubprogram(name: "foo",
+; CHECK: [[BAR:![0-9]+]] = distinct !DISubprogram(name: "bar",
+; CHECK: [[m23]] = !DILocalVariable(name: "x", arg: 1, scope: [[FOO]]
+; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE:![0-9]+]])
+; CHECK: [[CALL_SITE]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 2ef216e2d38a..6784e16b1d87 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -344,4 +344,5 @@ terminate:
; CHECK: attributes [[NUW]] = { nounwind }
; CHECK: attributes #1 = { nounwind readnone }
; CHECK: attributes #2 = { ssp uwtable }
-; CHECK: attributes #3 = { noreturn nounwind }
+; CHECK: attributes #3 = { argmemonly nounwind }
+; CHECK: attributes #4 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/noalias-calls.ll b/test/Transforms/Inline/noalias-calls.ll
index c09d2a673297..56d5c6dc0818 100644
--- a/test/Transforms/Inline/noalias-calls.ll
+++ b/test/Transforms/Inline/noalias-calls.ll
@@ -16,24 +16,25 @@ entry:
ret void
}
-define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
entry:
tail call void @hello(i8* %a, i8* %c, i8* %b)
ret void
}
-; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
; CHECK: entry:
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #0, !noalias !0
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #0, !alias.scope !5
-; CHECK: call void @hey() #0, !noalias !5
-; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !0
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #1, !alias.scope !5
+; CHECK: call void @hey() #1, !noalias !5
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
; CHECK: ret void
; CHECK: }
-attributes #0 = { nounwind }
-attributes #1 = { nounwind uwtable }
+attributes #0 = { nounwind argmemonly }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind uwtable }
; CHECK: !0 = !{!1}
; CHECK: !1 = distinct !{!1, !2, !"hello: %c"}
diff --git a/test/Transforms/Inline/noalias-cs.ll b/test/Transforms/Inline/noalias-cs.ll
index 0bff1882e832..8528a391cf95 100644
--- a/test/Transforms/Inline/noalias-cs.ll
+++ b/test/Transforms/Inline/noalias-cs.ll
@@ -34,13 +34,13 @@ entry:
; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
; CHECK: store float %1, float* %arrayidx.i, align 4, !noalias !16
; CHECK: %2 = load float, float* %a, align 4, !alias.scope !16, !noalias !17
-; CHECK: %arrayidx.i.i.1 = getelementptr inbounds float, float* %b, i64 5
-; CHECK: store float %2, float* %arrayidx.i.i.1, align 4, !alias.scope !21, !noalias !22
-; CHECK: %arrayidx1.i.i.2 = getelementptr inbounds float, float* %b, i64 8
-; CHECK: store float %2, float* %arrayidx1.i.i.2, align 4, !alias.scope !23, !noalias !24
+; CHECK: %arrayidx.i.i1 = getelementptr inbounds float, float* %b, i64 5
+; CHECK: store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22
+; CHECK: %arrayidx1.i.i2 = getelementptr inbounds float, float* %b, i64 8
+; CHECK: store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24
; CHECK: %3 = load float, float* %a, align 4, !alias.scope !16
-; CHECK: %arrayidx.i.3 = getelementptr inbounds float, float* %b, i64 7
-; CHECK: store float %3, float* %arrayidx.i.3, align 4, !alias.scope !16
+; CHECK: %arrayidx.i3 = getelementptr inbounds float, float* %b, i64 7
+; CHECK: store float %3, float* %arrayidx.i3, align 4, !alias.scope !16
; CHECK: ret void
; CHECK: }
diff --git a/test/Transforms/Inline/noalias2.ll b/test/Transforms/Inline/noalias2.ll
index df135b0a318a..432fccf431c0 100644
--- a/test/Transforms/Inline/noalias2.ll
+++ b/test/Transforms/Inline/noalias2.ll
@@ -61,8 +61,8 @@ entry:
; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
; CHECK: store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13
; CHECK: %2 = load float, float* %c, align 4, !noalias !15
-; CHECK: %arrayidx.i.1 = getelementptr inbounds float, float* %a, i64 6
-; CHECK: store float %2, float* %arrayidx.i.1, align 4, !alias.scope !19, !noalias !20
+; CHECK: %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6
+; CHECK: store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20
; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
; CHECK: store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19
; CHECK: %3 = load float, float* %c, align 4
diff --git a/test/Transforms/Inline/zero-cost.ll b/test/Transforms/Inline/zero-cost.ll
new file mode 100644
index 000000000000..8e7194a1963b
--- /dev/null
+++ b/test/Transforms/Inline/zero-cost.ll
@@ -0,0 +1,17 @@
+; RUN: opt -inline -S %s | FileCheck %s
+
+define void @f() {
+entry:
+ tail call void @g()
+ unreachable
+
+; CHECK-LABEL: @f
+; CHECK-NOT: call
+; CHECK: unreachable
+}
+
+define void @g() {
+entry:
+ unreachable
+}
+
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 7f06f009515f..c303ddd58974 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -instcombine -S | grep icmp
; PR1646
-@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=1]
+@__gthrw_pthread_cancel = weak alias i32 (i32), i32 (i32)* @pthread_cancel ; <i32 (i32)*> [#uses=1]
@__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*) ; <i8**> [#uses=1]
define weak i32 @pthread_cancel(i32) {
ret i32 0
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index 3793a860e8e9..7c6df1f984a4 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -instcombine -S | grep icmp
; PR1678
-@A = weak alias void ()* @B ; <void ()*> [#uses=1]
+@A = weak alias void (), void ()* @B ; <void ()*> [#uses=1]
define weak void @B() {
ret void
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index a4d77cbe8efb..75050c91bbb4 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -69,9 +69,11 @@ lpad.c:
filter [1 x i32*] [i32* @T1]
catch i32* @T2
unreachable
+; Caught types should not be removed from filters
; CHECK: %c = landingpad
-; CHECK-NEXT: @T1
-; CHECK-NEXT: filter [0 x i32*]
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [1 x i32*] [i32* @T1]
+; CHECK-NEXT: catch i32* @T2
; CHECK-NEXT: unreachable
lpad.d:
@@ -87,9 +89,10 @@ lpad.e:
catch i32* @T1
filter [3 x i32*] [i32* @T1, i32* @T2, i32* @T2]
unreachable
+; Caught types should not be removed from filters
; CHECK: %e = landingpad
-; CHECK-NEXT: @T1
-; CHECK-NEXT: filter [1 x i32*] [i32* @T2]
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [2 x i32*] [i32* @T1, i32* @T2]
; CHECK-NEXT: unreachable
lpad.f:
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 1af2b0ffbf0a..2fe9e8cadeb7 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -395,3 +395,13 @@ define i8 @add_of_mul(i8 %x, i8 %y, i8 %z) {
%sum = add nsw i8 %mA, %mB
ret i8 %sum
}
+
+define i32 @add_of_selects(i1 %A, i32 %B) {
+ %sel0 = select i1 %A, i32 0, i32 -2
+ %sel1 = select i1 %A, i32 %B, i32 2
+ %add = add i32 %sel0, %sel1
+ ret i32 %add
+; CHECK-LABEL: @add_of_selects(
+; CHECK-NEXT: %[[sel:.*]] = select i1 %A, i32 %B, i32 0
+; CHECK-NEXT: ret i32 %[[sel]]
+}
diff --git a/test/Transforms/InstCombine/alias-recursion.ll b/test/Transforms/InstCombine/alias-recursion.ll
index 74254f3e8dff..efc1899e1f47 100644
--- a/test/Transforms/InstCombine/alias-recursion.ll
+++ b/test/Transforms/InstCombine/alias-recursion.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-pc-windows-msvc"
@0 = constant [1 x i8*] zeroinitializer
-@vtbl = alias getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
+@vtbl = alias i8*, getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
define i32 (%class.A*)* @test() {
; CHECK-LABEL: test
diff --git a/test/Transforms/InstCombine/all-bits-shift.ll b/test/Transforms/InstCombine/all-bits-shift.ll
new file mode 100644
index 000000000000..b9eb19cf2ad1
--- /dev/null
+++ b/test/Transforms/InstCombine/all-bits-shift.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@d = global i32 15, align 4
+@b = global i32* @d, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+ %0 = load i32*, i32** @b, align 8
+ %1 = load i32, i32* @a, align 4
+ %lnot = icmp eq i32 %1, 0
+ %lnot.ext = zext i1 %lnot to i32
+ %shr.i = lshr i32 2072, %lnot.ext
+ %call.lobit = lshr i32 %shr.i, 7
+ %2 = and i32 %call.lobit, 1
+ %3 = load i32, i32* %0, align 4
+ %or = or i32 %2, %3
+ store i32 %or, i32* %0, align 4
+ %4 = load i32, i32* @a, align 4
+ %lnot.1 = icmp eq i32 %4, 0
+ %lnot.ext.1 = zext i1 %lnot.1 to i32
+ %shr.i.1 = lshr i32 2072, %lnot.ext.1
+ %call.lobit.1 = lshr i32 %shr.i.1, 7
+ %5 = and i32 %call.lobit.1, 1
+ %or.1 = or i32 %5, %or
+ store i32 %or.1, i32* %0, align 4
+ ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+; ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index b61b75e9f9f3..2ee0372e5e0a 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -163,3 +163,14 @@ entry:
call void (...) @use(i1* %v32, i1* %v64, i1* %v33)
ret void
}
+
+define void @test11() {
+entry:
+; ALL-LABEL: @test11(
+; ALL: %y = alloca i32
+; ALL: call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+; ALL: ret void
+ %y = alloca i32
+ call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+ ret void
+}
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index 037641b90ad7..53ea81d1c0d4 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,6 +1,9 @@
; RUN: opt < %s -instcombine -S | \
; RUN: FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
; Should be optimized to one and.
define i1 @test1(i32 %a, i32 %b) {
; CHECK-LABEL: @test1(
@@ -13,3 +16,23 @@ define i1 @test1(i32 %a, i32 %b) {
%tmp = icmp ne i32 %tmp1, %tmp3 ; <i1> [#uses=1]
ret i1 %tmp
}
+
+define zeroext i1 @test2(i64 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %[[trunc:.*]] = trunc i64 %A to i8
+; CHECK-NEXT: %[[icmp:.*]] = icmp sgt i8 %[[trunc]], -1
+; CHECK-NEXT: ret i1 %[[icmp]]
+ %and = and i64 %A, 128
+ %cmp = icmp eq i64 %and, 0
+ ret i1 %cmp
+}
+
+define zeroext i1 @test3(i64 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %[[trunc:.*]] = trunc i64 %A to i8
+; CHECK-NEXT: %[[icmp:.*]] = icmp slt i8 %[[trunc]], 0
+; CHECK-NEXT: ret i1 %[[icmp]]
+ %and = and i64 %A, 128
+ %cmp = icmp ne i64 %and, 0
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 96b535dda99d..326bfda38553 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -77,3 +77,71 @@ define i1 @test8(i32 %i) {
%cond = and i1 %cmp1, %cmp2
ret i1 %cond
}
+
+; combine -x & 1 into x & 1
+define i64 @test9(i64 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NOT: %sub = sub nsw i64 0, %x
+; CHECK-NOT: %and = and i64 %sub, 1
+; CHECK-NEXT: %and = and i64 %x, 1
+; CHECK-NEXT: ret i64 %and
+ %sub = sub nsw i64 0, %x
+ %and = and i64 %sub, 1
+ ret i64 %and
+}
+
+define i64 @test10(i64 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NOT: %sub = sub nsw i64 0, %x
+; CHECK-NEXT: %and = and i64 %x, 1
+; CHECK-NOT: %add = add i64 %sub, %and
+; CHECK-NEXT: %add = sub i64 %and, %x
+; CHECK-NEXT: ret i64 %add
+ %sub = sub nsw i64 0, %x
+ %and = and i64 %sub, 1
+ %add = add i64 %sub, %and
+ ret i64 %add
+}
+
+define i64 @fabs_double(double %x) {
+; CHECK-LABEL: @fabs_double(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %and = bitcast double %fabs to i64
+; CHECK-NEXT: ret i64 %and
+ %bc = bitcast double %x to i64
+ %and = and i64 %bc, 9223372036854775807
+ ret i64 %and
+}
+
+define i64 @fabs_double_swap(double %x) {
+; CHECK-LABEL: @fabs_double_swap(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %and = bitcast double %fabs to i64
+; CHECK-NEXT: ret i64 %and
+ %bc = bitcast double %x to i64
+ %and = and i64 9223372036854775807, %bc
+ ret i64 %and
+}
+
+define i32 @fabs_float(float %x) {
+; CHECK-LABEL: @fabs_float(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: %and = bitcast float %fabs to i32
+; CHECK-NEXT: ret i32 %and
+ %bc = bitcast float %x to i32
+ %and = and i32 %bc, 2147483647
+ ret i32 %and
+}
+
+; Make sure that only a bitcast is transformed.
+
+define i64 @fabs_double_not_bitcast(double %x) {
+; CHECK-LABEL: @fabs_double_not_bitcast(
+; CHECK-NEXT: %bc = fptoui double %x to i64
+; CHECK-NEXT: %and = and i64 %bc, 9223372036854775807
+; CHECK-NEXT: ret i64 %and
+ %bc = fptoui double %x to i64
+ %and = and i64 %bc, 9223372036854775807
+ ret i64 %and
+}
+
diff --git a/test/Transforms/InstCombine/apint-or.ll b/test/Transforms/InstCombine/apint-or.ll
new file mode 100644
index 000000000000..e2312b61f2b9
--- /dev/null
+++ b/test/Transforms/InstCombine/apint-or.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These tests are for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i23 @test1(i23 %A) {
+ ;; A | ~A == -1
+ %NotA = xor i23 -1, %A
+ %B = or i23 %A, %NotA
+ ret i23 %B
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i23 -1
+}
+
+define i39 @test2(i39 %V, i39 %M) {
+ ;; If we have: ((V + N) & C1) | (V & C2)
+ ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ ;; replace with V+N.
+ %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+ %N = and i39 %M, 274877906944
+ %A = add i39 %V, %N
+ %B = and i39 %A, %C1
+ %D = and i39 %V, 274877906943
+ %R = or i39 %B, %D
+ ret i39 %R
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %N = and i39 %M, -274877906944
+; CHECK-NEXT: %A = add i39 %N, %V
+; CHECK-NEXT: ret i39 %A
+}
+
+define i43 @test3(i43 %A, i43 %B) {
+ ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
+ %NotA = xor i43 %A, -1
+ %NotB = xor i43 %B, -1
+ %C1 = or i43 %NotA, %NotB
+ ret i43 %C1
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %C1.demorgan = and i43 %A, %B
+; CHECK-NEXT: %C1 = xor i43 %C1.demorgan, -1
+; CHECK-NEXT: ret i43 %C1
+}
+
+; These tests are for Integer BitWidth > 64 && BitWidth <= 1024.
+define i1023 @test4(i1023 %A) {
+ ;; A | ~A == -1
+ %NotA = xor i1023 -1, %A
+ %B = or i1023 %A, %NotA
+ ret i1023 %B
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret i1023 -1
+}
+
+define i399 @test5(i399 %V, i399 %M) {
+ ;; If we have: ((V + N) & C1) | (V & C2)
+ ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ ;; replace with V+N.
+ %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+ %N = and i399 %M, 18446742974197923840
+ %A = add i399 %V, %N
+ %B = and i399 %A, %C1
+ %D = and i399 %V, 274877906943
+ %R = or i399 %B, %D
+ ret i399 %R
+; CHECK-LABEL: @test5
+; CHECK-NEXT: %N = and i399 %M, 18446742974197923840
+; CHECK-NEXT: %A = add i399 %N, %V
+; CHECK-NEXT: ret i399 %A
+}
+
+define i129 @test6(i129 %A, i129 %B) {
+ ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
+ %NotA = xor i129 %A, -1
+ %NotB = xor i129 %B, -1
+ %C1 = or i129 %NotA, %NotB
+ ret i129 %C1
+; CHECK-LABEL: @test6
+; CHECK-NEXT: %C1.demorgan = and i129 %A, %B
+; CHECK-NEXT: %C1 = xor i129 %C1.demorgan, -1
+; CHECK-NEXT: ret i129 %C1
+}
diff --git a/test/Transforms/InstCombine/apint-or1.ll b/test/Transforms/InstCombine/apint-or1.ll
deleted file mode 100644
index d4f87ac894d9..000000000000
--- a/test/Transforms/InstCombine/apint-or1.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; This test makes sure that or instructions are properly eliminated.
-; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
-;
-
-; RUN: opt < %s -instcombine -S | not grep or
-
-
-define i7 @test0(i7 %X) {
- %Y = or i7 %X, 0
- ret i7 %Y
-}
-
-define i17 @test1(i17 %X) {
- %Y = or i17 %X, -1
- ret i17 %Y
-}
-
-define i23 @test2(i23 %A) {
- ;; A | ~A == -1
- %NotA = xor i23 -1, %A
- %B = or i23 %A, %NotA
- ret i23 %B
-}
-
-define i39 @test3(i39 %V, i39 %M) {
- ;; If we have: ((V + N) & C1) | (V & C2)
- ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
- ;; replace with V+N.
- %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
- %N = and i39 %M, 274877906944
- %A = add i39 %V, %N
- %B = and i39 %A, %C1
- %D = and i39 %V, 274877906943
- %R = or i39 %B, %D
- ret i39 %R
-}
diff --git a/test/Transforms/InstCombine/apint-or2.ll b/test/Transforms/InstCombine/apint-or2.ll
deleted file mode 100644
index d7de255f7fd2..000000000000
--- a/test/Transforms/InstCombine/apint-or2.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; This test makes sure that or instructions are properly eliminated.
-; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
-;
-; RUN: opt < %s -instcombine -S | not grep or
-
-
-define i777 @test0(i777 %X) {
- %Y = or i777 %X, 0
- ret i777 %Y
-}
-
-define i117 @test1(i117 %X) {
- %Y = or i117 %X, -1
- ret i117 %Y
-}
-
-define i1023 @test2(i1023 %A) {
- ;; A | ~A == -1
- %NotA = xor i1023 -1, %A
- %B = or i1023 %A, %NotA
- ret i1023 %B
-}
-
-define i399 @test3(i399 %V, i399 %M) {
- ;; If we have: ((V + N) & C1) | (V & C2)
- ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
- ;; replace with V+N.
- %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
- %N = and i399 %M, 18446742974197923840
- %A = add i399 %V, %N
- %B = and i399 %A, %C1
- %D = and i399 %V, 274877906943
- %R = or i399 %B, %D
- ret i399 %R
-}
diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll
index 4b869ef2c50e..4bdbcc8d086a 100644
--- a/test/Transforms/InstCombine/assume-redundant.ll
+++ b/test/Transforms/InstCombine/assume-redundant.ll
@@ -47,6 +47,32 @@ for.end: ; preds = %for.body
ret void
}
+declare align 8 i8* @get()
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test1() {
+ %p = call align 8 i8* @get()
+ %ptrint = ptrtoint i8* %p to i64
+ %maskedptr = and i64 %ptrint, 7
+ %maskcond = icmp eq i64 %maskedptr, 0
+ call void @llvm.assume(i1 %maskcond)
+ ret void
+}
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test3() {
+ %p = alloca i8, align 8
+ %ptrint = ptrtoint i8* %p to i64
+ %maskedptr = and i64 %ptrint, 7
+ %maskcond = icmp eq i64 %maskedptr, 0
+ call void @llvm.assume(i1 %maskcond)
+ ret void
+}
+
; Function Attrs: nounwind
declare void @llvm.assume(i1) #1
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index 1a598a5d4153..b04308e10e23 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -6,46 +6,46 @@ target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16
; Cases that should be bitcast
; Test cast between scalars with same bit sizes
-@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
+@alias_i32_to_f32 = alias float (float), bitcast (i32 (i32)* @func_i32 to float (float)*)
; Test cast between vectors with same number of elements and bit sizes
-@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
; Test cast from vector to scalar with same number of bits
-@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
; Test cast from scalar to vector with same number of bits
-@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+@alias_i64_to_v2f32 = alias i64 (i64), bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
; Test cast between vectors of pointers
-@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
; Cases that should be invalid and unchanged
; Test cast between scalars with different bit sizes
-@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
+@alias_i64_to_f32 = alias float (float), bitcast (i64 (i64)* @func_i64 to float (float)*)
; Test cast between vectors with different bit sizes but the
; same number of elements
-@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
; Test cast between vectors with same number of bits and different
; numbers of elements
-@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+@alias_v2i32_to_v4f32 = alias <4 x float> (<4 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
; Test cast between scalar and vector with different number of bits
-@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+@alias_i64_to_v4f32 = alias i64 (i64), bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
; Test cast between vector and scalar with different number of bits
-@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
; Test cast from scalar to vector of pointers with same number of bits
; We don't know the pointer size at this point, so this can't be done
-@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+@alias_i64_to_v2i32p = alias i64 (i64), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
; Test cast between vector of pointers and scalar with different number of bits
-@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
diff --git a/test/Transforms/InstCombine/bitcast-bitcast.ll b/test/Transforms/InstCombine/bitcast-bitcast.ll
new file mode 100644
index 000000000000..0f46ff53bc18
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-bitcast.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check all scalar / vector combinations for a pair of bitcasts.
+
+define ppc_fp128 @bitcast_bitcast_s_s_s(i128 %a) {
+ %bc1 = bitcast i128 %a to fp128
+ %bc2 = bitcast fp128 %bc1 to ppc_fp128
+ ret ppc_fp128 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_s(
+; CHECK-NEXT: %bc2 = bitcast i128 %a to ppc_fp128
+; CHECK-NEXT: ret ppc_fp128 %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_s_v(i64 %a) {
+ %bc1 = bitcast i64 %a to double
+ %bc2 = bitcast double %bc1 to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_v(
+; CHECK-NEXT: %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
+define double @bitcast_bitcast_s_v_s(i64 %a) {
+ %bc1 = bitcast i64 %a to <2 x i32>
+ %bc2 = bitcast <2 x i32> %bc1 to double
+ ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_s(
+; CHECK-NEXT: %bc2 = bitcast i64 %a to double
+; CHECK-NEXT: ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_v_v(i64 %a) {
+ %bc1 = bitcast i64 %a to <4 x i16>
+ %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_v(
+; CHECK-NEXT: %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
+define i64 @bitcast_bitcast_v_s_s(<2 x i32> %a) {
+ %bc1 = bitcast <2 x i32> %a to double
+ %bc2 = bitcast double %bc1 to i64
+ ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_s(
+; CHECK-NEXT: %bc2 = bitcast <2 x i32> %a to i64
+; CHECK-NEXT: ret i64 %bc2
+}
+
+define <4 x i16> @bitcast_bitcast_v_s_v(<2 x i32> %a) {
+ %bc1 = bitcast <2 x i32> %a to double
+ %bc2 = bitcast double %bc1 to <4 x i16>
+ ret <4 x i16> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_v(
+; CHECK-NEXT: %bc2 = bitcast <2 x i32> %a to <4 x i16>
+; CHECK-NEXT: ret <4 x i16> %bc2
+}
+
+define double @bitcast_bitcast_v_v_s(<2 x float> %a) {
+ %bc1 = bitcast <2 x float> %a to <4 x i16>
+ %bc2 = bitcast <4 x i16> %bc1 to double
+ ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_s(
+; CHECK-NEXT: %bc2 = bitcast <2 x float> %a to double
+; CHECK-NEXT: ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_v_v_v(<2 x float> %a) {
+ %bc1 = bitcast <2 x float> %a to <4 x i16>
+ %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_v(
+; CHECK-NEXT: %bc2 = bitcast <2 x float> %a to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
index d27765e89424..97145221099e 100644
--- a/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -1,22 +1,41 @@
-; RUN: opt < %s -instcombine -S | grep element | count 4
+; RUN: opt < %s -instcombine -S | FileCheck %s
define double @a(<1 x i64> %y) {
%c = bitcast <1 x i64> %y to double
- ret double %c
+ ret double %c
+
+; CHECK-LABEL: @a(
+; CHECK-NEXT: bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT: extractelement <1 x double> {{.*}}, i32 0
+; CHECK-NEXT: ret double
}
define i64 @b(<1 x i64> %y) {
%c = bitcast <1 x i64> %y to i64
- ret i64 %c
+ ret i64 %c
+
+; CHECK-LABEL: @b(
+; CHECK-NEXT: extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT: ret i64
}
define <1 x i64> @c(double %y) {
%c = bitcast double %y to <1 x i64>
ret <1 x i64> %c
+
+; CHECK-LABEL: @c(
+; CHECK-NEXT: bitcast double %y to i64
+; CHECK-NEXT: insertelement <1 x i64> undef, i64 {{.*}}, i32 0
+; CHECK-NEXT: ret <1 x i64>
}
define <1 x i64> @d(i64 %y) {
%c = bitcast i64 %y to <1 x i64>
ret <1 x i64> %c
+
+; CHECK-LABEL: @d(
+; CHECK-NEXT: insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT: ret <1 x i64>
}
+
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 579839e4245b..bccd19cc32ea 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -64,6 +64,61 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
; CHECK-NEXT: ret float %add
}
+; Both bitcasts are unnecessary; change the extractelement.
+
+define float @bitcast_extelt1(<2 x float> %A) {
+ %bc1 = bitcast <2 x float> %A to <2 x i32>
+ %ext = extractelement <2 x i32> %bc1, i32 0
+ %bc2 = bitcast i32 %ext to float
+ ret float %bc2
+
+; CHECK-LABEL: @bitcast_extelt1(
+; CHECK-NEXT: %bc2 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT: ret float %bc2
+}
+
+; Second bitcast can be folded into the first.
+
+define i64 @bitcast_extelt2(<4 x float> %A) {
+ %bc1 = bitcast <4 x float> %A to <2 x double>
+ %ext = extractelement <2 x double> %bc1, i32 1
+ %bc2 = bitcast double %ext to i64
+ ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_extelt2(
+; CHECK-NEXT: %bc = bitcast <4 x float> %A to <2 x i64>
+; CHECK-NEXT: %bc2 = extractelement <2 x i64> %bc, i32 1
+; CHECK-NEXT: ret i64 %bc2
+}
+
+; TODO: This should return %A.
+
+define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
+ %bc1 = bitcast <2 x i32> %A to <1 x i64>
+ %ext = extractelement <1 x i64> %bc1, i32 0
+ %bc2 = bitcast i64 %ext to <2 x i32>
+ ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_extelt3(
+; CHECK-NEXT: %bc1 = bitcast <2 x i32> %A to <1 x i64>
+; CHECK-NEXT: %ext = extractelement <1 x i64> %bc1, i32 0
+; CHECK-NEXT: %bc2 = bitcast i64 %ext to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %bc2
+}
+
+; Handle the case where the input is not a vector.
+
+define double @bitcast_extelt4(i128 %A) {
+ %bc1 = bitcast i128 %A to <2 x i64>
+ %ext = extractelement <2 x i64> %bc1, i32 0
+ %bc2 = bitcast i64 %ext to double
+ ret double %bc2
+
+; CHECK-LABEL: @bitcast_extelt4(
+; CHECK-NEXT: %bc = bitcast i128 %A to <2 x double>
+; CHECK-NEXT: %bc2 = extractelement <2 x double> %bc, i32 0
+; CHECK-NEXT: ret double %bc2
+}
define <2 x i32> @test4(i32 %A, i32 %B){
%tmp38 = zext i32 %A to i64
diff --git a/test/Transforms/InstCombine/bitreverse-fold.ll b/test/Transforms/InstCombine/bitreverse-fold.ll
new file mode 100644
index 000000000000..ad7fc3a74644
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-fold.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %p) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i32 %p
+ %a = call i32 @llvm.bitreverse.i32(i32 %p)
+ %b = call i32 @llvm.bitreverse.i32(i32 %a)
+ ret i32 %b
+}
+
+declare i32 @llvm.bitreverse.i32(i32) readnone
diff --git a/test/Transforms/InstCombine/bitreverse-recognize.ll b/test/Transforms/InstCombine/bitreverse-recognize.ll
new file mode 100644
index 000000000000..fbd5cb6d139c
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-recognize.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define zeroext i8 @f_u8(i8 zeroext %a) {
+; CHECK-LABEL: @f_u8
+; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a)
+; CHECK-NEXT: ret i8 %[[A]]
+ %1 = shl i8 %a, 7
+ %2 = shl i8 %a, 5
+ %3 = and i8 %2, 64
+ %4 = shl i8 %a, 3
+ %5 = and i8 %4, 32
+ %6 = shl i8 %a, 1
+ %7 = and i8 %6, 16
+ %8 = lshr i8 %a, 1
+ %9 = and i8 %8, 8
+ %10 = lshr i8 %a, 3
+ %11 = and i8 %10, 4
+ %12 = lshr i8 %a, 5
+ %13 = and i8 %12, 2
+ %14 = lshr i8 %a, 7
+ %15 = or i8 %14, %1
+ %16 = or i8 %15, %3
+ %17 = or i8 %16, %5
+ %18 = or i8 %17, %7
+ %19 = or i8 %18, %9
+ %20 = or i8 %19, %11
+ %21 = or i8 %20, %13
+ ret i8 %21
+}
+
+; The ANDs with 32 and 64 have been swapped here, so the sequence does not
+; completely match a bitreverse.
+define zeroext i8 @f_u8_fail(i8 zeroext %a) {
+; CHECK-LABEL: @f_u8_fail
+; CHECK-NOT: call
+; CHECK: ret i8
+ %1 = shl i8 %a, 7
+ %2 = shl i8 %a, 5
+ %3 = and i8 %2, 32
+ %4 = shl i8 %a, 3
+ %5 = and i8 %4, 64
+ %6 = shl i8 %a, 1
+ %7 = and i8 %6, 16
+ %8 = lshr i8 %a, 1
+ %9 = and i8 %8, 8
+ %10 = lshr i8 %a, 3
+ %11 = and i8 %10, 4
+ %12 = lshr i8 %a, 5
+ %13 = and i8 %12, 2
+ %14 = lshr i8 %a, 7
+ %15 = or i8 %14, %1
+ %16 = or i8 %15, %3
+ %17 = or i8 %16, %5
+ %18 = or i8 %17, %7
+ %19 = or i8 %18, %9
+ %20 = or i8 %19, %11
+ %21 = or i8 %20, %13
+ ret i8 %21
+}
+
+define zeroext i16 @f_u16(i16 zeroext %a) {
+; CHECK-LABEL: @f_u16
+; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a)
+; CHECK-NEXT: ret i16 %[[A]]
+ %1 = shl i16 %a, 15
+ %2 = shl i16 %a, 13
+ %3 = and i16 %2, 16384
+ %4 = shl i16 %a, 11
+ %5 = and i16 %4, 8192
+ %6 = shl i16 %a, 9
+ %7 = and i16 %6, 4096
+ %8 = shl i16 %a, 7
+ %9 = and i16 %8, 2048
+ %10 = shl i16 %a, 5
+ %11 = and i16 %10, 1024
+ %12 = shl i16 %a, 3
+ %13 = and i16 %12, 512
+ %14 = shl i16 %a, 1
+ %15 = and i16 %14, 256
+ %16 = lshr i16 %a, 1
+ %17 = and i16 %16, 128
+ %18 = lshr i16 %a, 3
+ %19 = and i16 %18, 64
+ %20 = lshr i16 %a, 5
+ %21 = and i16 %20, 32
+ %22 = lshr i16 %a, 7
+ %23 = and i16 %22, 16
+ %24 = lshr i16 %a, 9
+ %25 = and i16 %24, 8
+ %26 = lshr i16 %a, 11
+ %27 = and i16 %26, 4
+ %28 = lshr i16 %a, 13
+ %29 = and i16 %28, 2
+ %30 = lshr i16 %a, 15
+ %31 = or i16 %30, %1
+ %32 = or i16 %31, %3
+ %33 = or i16 %32, %5
+ %34 = or i16 %33, %7
+ %35 = or i16 %34, %9
+ %36 = or i16 %35, %11
+ %37 = or i16 %36, %13
+ %38 = or i16 %37, %15
+ %39 = or i16 %38, %17
+ %40 = or i16 %39, %19
+ %41 = or i16 %40, %21
+ %42 = or i16 %41, %23
+ %43 = or i16 %42, %25
+ %44 = or i16 %43, %27
+ %45 = or i16 %44, %29
+ ret i16 %45
+} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/blend_x86.ll b/test/Transforms/InstCombine/blend_x86.ll
index 778d44ba342c..eb0b8d7584ab 100644
--- a/test/Transforms/InstCombine/blend_x86.ll
+++ b/test/Transforms/InstCombine/blend_x86.ll
@@ -2,42 +2,118 @@
define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd
-; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: ret <2 x double> %1
%1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <2 x double> %1
}
+define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_zero
+; CHECK-NEXT: ret <2 x double> %xy
+ %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
+ ret <2 x double> %1
+}
+
+define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_dup
+; CHECK-NEXT: ret <2 x double> %xy
+ %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
+ ret <2 x double> %1
+}
+
define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps
-; CHECK: select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: ret <4 x float> %1
%1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <4 x float> %1
}
+define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_zero
+; CHECK-NEXT: ret <4 x float> %xyzw
+ %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
+ ret <4 x float> %1
+}
+
+define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_dup
+; CHECK-NEXT: ret <4 x float> %xyzw
+ %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
+ ret <4 x float> %1
+}
+
define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb
-; CHECK: select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: ret <16 x i8> %1
%1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
ret <16 x i8> %1
}
+define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_zero
+; CHECK-NEXT: ret <16 x i8> %xyzw
+ %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_dup
+; CHECK-NEXT: ret <16 x i8> %xyzw
+ %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
+ ret <16 x i8> %1
+}
+
define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; CHECK-LABEL: @constant_blendvpd_avx
-; CHECK: select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: ret <4 x double> %1
%1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
ret <4 x double> %1
}
+define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx_zero
+; CHECK-NEXT: ret <4 x double> %xy
+ %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
+ ret <4 x double> %1
+}
+
+define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_avx_dup
+; CHECK-NEXT: ret <4 x double> %xy
+ %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
+ ret <4 x double> %1
+}
+
define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
; CHECK-LABEL: @constant_blendvps_avx
-; CHECK: select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: ret <8 x float> %1
%1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
ret <8 x float> %1
}
+define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx_zero
+; CHECK-NEXT: ret <8 x float> %xyzw
+ %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
+ ret <8 x float> %1
+}
+
+define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_avx_dup
+; CHECK-NEXT: ret <8 x float> %xyzw
+ %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
+ ret <8 x float> %1
+}
+
define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; CHECK-LABEL: @constant_pblendvb_avx2
-; CHECK: select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: ret <32 x i8> %1
%1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
<32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
@@ -46,6 +122,20 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
ret <32 x i8> %1
}
+define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2_zero
+; CHECK-NEXT: ret <32 x i8> %xyzw
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_avx2_dup
+; CHECK-NEXT: ret <32 x i8> %xyzw
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
+ ret <32 x i8> %1
+}
+
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 63b0775e4aff..edf9572f1e11 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -51,7 +51,7 @@ define i32 @test5(i32 %a) nounwind {
define i32 @test6(i32 %a) nounwind {
; CHECK-LABEL: @test6
; CHECK-NEXT: %tmp2 = lshr i32 %a, 24
-; CHECK-NEXT ret i32 %tmp4
+; CHECK-NEXT: ret i32 %tmp2
%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
%tmp4 = and i32 %tmp2, 255
ret i32 %tmp4
@@ -62,7 +62,7 @@ define i16 @test7(i32 %A) {
; CHECK-LABEL: @test7
; CHECK-NEXT: %1 = lshr i32 %A, 16
; CHECK-NEXT: %D = trunc i32 %1 to i16
-; CHECK-NEXT ret i16 %D
+; CHECK-NEXT: ret i16 %D
%B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
%C = trunc i32 %B to i16
%D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
@@ -73,7 +73,7 @@ define i16 @test8(i64 %A) {
; CHECK-LABEL: @test8
; CHECK-NEXT: %1 = lshr i64 %A, 48
; CHECK-NEXT: %D = trunc i64 %1 to i16
-; CHECK-NEXT ret i16 %D
+; CHECK-NEXT: ret i16 %D
%B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind
%C = trunc i64 %B to i16
%D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
diff --git a/test/Transforms/InstCombine/bswap-known-bits.ll b/test/Transforms/InstCombine/bswap-known-bits.ll
new file mode 100644
index 000000000000..1f3285af65cc
--- /dev/null
+++ b/test/Transforms/InstCombine/bswap-known-bits.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; Note: This is testing functionality in computeKnownBits. I'd have rather
+; used instsimplify, but the bit test folding is apparently only in instcombine.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+ %a = or i16 %arg, 511
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 256
+ ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+ %a = or i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 256
+ ret i1 %res
+}
+
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+ %a = or i16 %arg, 256
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 1
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
+
+define i1 @test4(i32 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+ %a = or i32 %arg, 2147483647 ; i32_MAX
+ %b = call i32 @llvm.bswap.i32(i32 %a)
+ %and = and i32 %b, 127
+ %res = icmp eq i32 %and, 127
+ ret i1 %res
+}
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index ba7df3125f4e..b48b2a57c8ce 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,7 +1,7 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
; RUN: opt < %s -instcombine -S | \
-; RUN: grep "call.*llvm.bswap" | count 6
+; RUN: grep "call.*llvm.bswap" | count 7
define i32 @test1(i32 %i) {
%tmp1 = lshr i32 %i, 24 ; <i32> [#uses=1]
@@ -72,3 +72,15 @@ define i32 @test6(i32 %x) nounwind readnone {
ret i32 %tmp7
}
+; PR23863
+define i32 @test7(i32 %x) {
+ %shl = shl i32 %x, 16
+ %shr = lshr i32 %x, 16
+ %or = or i32 %shl, %shr
+ %and2 = shl i32 %or, 8
+ %shl3 = and i32 %and2, -16711936
+ %and4 = lshr i32 %or, 8
+ %shr5 = and i32 %and4, 16711935
+ %or6 = or i32 %shl3, %shr5
+ ret i32 %or6
+}
diff --git a/test/Transforms/InstCombine/call_nonnull_arg.ll b/test/Transforms/InstCombine/call_nonnull_arg.ll
new file mode 100644
index 000000000000..b10411f622be
--- /dev/null
+++ b/test/Transforms/InstCombine/call_nonnull_arg.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should mark null-checked argument as nonnull at callsite
+declare void @dummy(i32*, i32)
+
+define void @test(i32* %a, i32 %b) {
+; CHECK-LABEL: @test
+; CHECK: call void @dummy(i32* nonnull %a, i32 %b)
+entry:
+ %cond1 = icmp eq i32* %a, null
+ br i1 %cond1, label %dead, label %not_null
+not_null:
+ %cond2 = icmp eq i32 %b, 0
+ br i1 %cond2, label %dead, label %not_zero
+not_zero:
+ call void @dummy(i32* %a, i32 %b)
+ ret void
+dead:
+ unreachable
+}
diff --git a/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll b/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
new file mode 100644
index 000000000000..0f8601b855cf
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @foo(i32)
+
+define void @g() {
+; CHECK-LABEL: @g(
+ entry:
+; CHECK: call void @foo(i32 0) [ "deopt"() ]
+ call void bitcast (void (i32)* @foo to void ()*) () [ "deopt"() ]
+ ret void
+}
diff --git a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
index 551d0efce5ea..2e87a7d78020 100644
--- a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
+++ b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -10,8 +10,8 @@ define i1 @i32_cast_cmp_oeq_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp oeq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp oeq float %f, -0.0
@@ -28,8 +28,8 @@ define i1 @i32_cast_cmp_oeq_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, -0.0
@@ -46,8 +46,8 @@ define i1 @i32_cast_cmp_one_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_one_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp one
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_one_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp one float %f, -0.0
@@ -64,8 +64,8 @@ define i1 @i32_cast_cmp_one_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_one_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp one
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_one_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp one float %f, -0.0
@@ -82,8 +82,8 @@ define i1 @i32_cast_cmp_ueq_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp ueq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ueq_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp ueq float %f, -0.0
@@ -100,8 +100,8 @@ define i1 @i32_cast_cmp_ueq_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp ueq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ueq_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp ueq float %f, -0.0
@@ -118,8 +118,8 @@ define i1 @i32_cast_cmp_une_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_une_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp une
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_une_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp une float %f, -0.0
@@ -136,8 +136,8 @@ define i1 @i32_cast_cmp_une_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_une_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp une
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_une_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp une float %f, -0.0
@@ -154,8 +154,8 @@ define i1 @i32_cast_cmp_ogt_int_0_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp ogt
+; CHECK: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ogt_int_n0_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp ogt float %f, -0.0
@@ -172,8 +172,8 @@ define i1 @i32_cast_cmp_ogt_int_0_sitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp ogt
+; CHECK: icmp sgt i32 %i, 0
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_ogt_int_n0_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp ogt float %f, -0.0
@@ -261,12 +261,13 @@ define i1 @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(i32 %i) {
ret i1 %cmp
}
-; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp oeq
+; Since 0xFFFFFF fits in a float, and one less and
+; one more than it also fits without rounding, the
+; test can be optimized to an integer compare.
-; XCHECK: icmp eq i32 %i, 16777215
-; XCHECK-NEXT: ret
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
+; CHECK: icmp eq i32 %i, 16777215
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
%f = uitofp i32 %i to float
%cmp = fcmp oeq float %f, 0x416FFFFFE0000000
@@ -274,17 +275,18 @@ define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
}
; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-
-; XCHECK: icmp eq i32 %i, 16777215
-; XCHECK-NEXT: ret
+; CHECK: icmp eq i32 %i, 16777215
+; CHECK-NEXT: ret
define i1 @i32_cast_cmp_oeq_int_i24max_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, 0x416FFFFFE0000000
ret i1 %cmp
}
+; Though 0x1000000 fits in a float, one more than it
+; would round to it too, hence a single integer comparison
+; does not suffice.
+
; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_uitofp(
; CHECK: uitofp
; CHECK: fcmp oeq
@@ -319,10 +321,18 @@ define i1 @i32_cast_cmp_oeq_int_i32umax_uitofp(i32 %i) {
ret i1 %cmp
}
+; 32-bit unsigned integer cannot possibly round up to 1<<33
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_big_uitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_big_uitofp(i32 %i) {
+ %f = uitofp i32 %i to float
+ %cmp = fcmp oeq float %f, 0x4200000000000000
+ ret i1 %cmp
+}
+
+; 32-bit signed integer cannot possibly round up to 1<<32
; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-; CHECK-NEXT: ret
+; CHECK-NEXT: ret i1 false
define i1 @i32_cast_cmp_oeq_int_i32umax_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, 0x41F0000000000000
@@ -379,10 +389,9 @@ define i1 @i32_cast_cmp_oeq_int_negi32umax_uitofp(i32 %i) {
ret i1 %cmp
}
+; 32-bit signed integer cannot possibly round to -1<<32
; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-; CHECK-NEXT: ret
+; CHECK-NEXT: ret i1 false
define i1 @i32_cast_cmp_oeq_int_negi32umax_sitofp(i32 %i) {
%f = sitofp i32 %i to float
%cmp = fcmp oeq float %f, 0xC1F0000000000000
@@ -452,3 +461,30 @@ define i1 @i32_cast_cmp_une_half_sitofp(i32 %i) {
%cmp = fcmp une float %f, 0.5
ret i1 %cmp
}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_uitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_inf_uitofp(i32 %i) {
+ %f = uitofp i32 %i to float
+ %cmp = fcmp oeq float %f, 0x7FF0000000000000
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_sitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_inf_sitofp(i32 %i) {
+ %f = sitofp i32 %i to float
+ %cmp = fcmp oeq float %f, 0x7FF0000000000000
+ ret i1 %cmp
+}
+
+; An i128 could round to an IEEE single-precision infinity.
+; CHECK-LABEL: @i128_cast_cmp_oeq_int_inf_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i128_cast_cmp_oeq_int_inf_uitofp(i128 %i) {
+ %f = uitofp i128 %i to float
+ %cmp = fcmp oeq float %f, 0x7FF0000000000000
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 47ba920d9286..8f19bdcdfde3 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -10,7 +10,7 @@ define i1 @test1(i32 %X) {
; Convert to setne int %X, 12
%c = icmp ne i32 %A, 12 ; <i1> [#uses=1]
ret i1 %c
-; CHECK-LABEL @test1(
+; CHECK-LABEL: @test1(
; CHECK: %c = icmp ne i32 %X, 12
; CHECK: ret i1 %c
}
@@ -21,7 +21,7 @@ define i1 @test2(i32 %X, i32 %Y) {
; Convert to setne int %X, %Y
%c = icmp ne i32 %A, %B ; <i1> [#uses=1]
ret i1 %c
-; CHECK-LABEL @test2(
+; CHECK-LABEL: @test2(
; CHECK: %c = icmp ne i32 %X, %Y
; CHECK: ret i1 %c
}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 7fe54ef8469b..016b6aa64558 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -187,8 +187,8 @@ define i32 @test21(i32 %X) {
%c2 = sext i8 %c1 to i32 ; <i32> [#uses=1]
%RV = and i32 %c2, 255 ; <i32> [#uses=1]
ret i32 %RV
-; CHECK: %c2.1 = and i32 %X, 255
-; CHECK: ret i32 %c2.1
+; CHECK: %c21 = and i32 %X, 255
+; CHECK: ret i32 %c21
}
define i32 @test22(i32 %X) {
@@ -722,7 +722,7 @@ define i1 @test67(i1 %a, i32 %b) {
; CHECK: ret i1 false
}
-%s = type { i32, i32, i32 }
+%s = type { i32, i32, i16 }
define %s @test68(%s *%p, i64 %i) {
; CHECK-LABEL: @test68(
@@ -1062,6 +1062,43 @@ define i8 @test85(i32 %a) {
; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
}
+define i16 @test86(i16 %v) {
+ %a = sext i16 %v to i32
+ %s = ashr i32 %a, 4
+ %t = trunc i32 %s to i16
+ ret i16 %t
+
+; CHECK-LABEL: @test86(
+; CHECK: [[ASHR:%.*]] = ashr i16 %v, 4
+; CHECK-NEXT: ret i16 [[ASHR]]
+}
+
+define i16 @test87(i16 %v) {
+ %c = sext i16 %v to i32
+ %m = mul nsw i32 %c, 16
+ %a = ashr i32 %m, 16
+ %t = trunc i32 %a to i16
+ ret i16 %t
+
+; CHECK-LABEL: @test87(
+; CHECK: [[ASHR:%.*]] = ashr i16 %v, 12
+; CHECK-NEXT: ret i16 [[ASHR]]
+}
+
+define i16 @test88(i16 %v) {
+ %a = sext i16 %v to i32
+ %s = ashr i32 %a, 18
+ %t = trunc i32 %s to i16
+ ret i16 %t
+
+; Do not optimize to ashr i16 (shift by 18)
+; CHECK-LABEL: @test88(
+; CHECK: [[SEXT:%.*]] = sext i16 %v to i32
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[SEXT]], 18
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ASHR]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+}
+
; Overflow on a float to int or int to float conversion is undefined (PR21130).
define i8 @overflow_fptosi() {
@@ -1137,3 +1174,14 @@ define i1 @PR23309v2(i32 %A, i32 %B) {
%trunc = trunc i32 %sub to i1
ret i1 %trunc
}
+
+define i16 @PR24763(i8 %V) {
+; CHECK-LABEL: @PR24763(
+; CHECK-NEXT: %[[sh:.*]] = ashr i8
+; CHECK-NEXT: %[[ext:.*]] = sext i8 %[[sh]] to i16
+; CHECK-NEXT: ret i16 %[[ext]]
+ %conv = sext i8 %V to i32
+ %l = lshr i32 %conv, 1
+ %t = trunc i32 %l to i16
+ ret i16 %t
+}
diff --git a/test/Transforms/InstCombine/compare-alloca.ll b/test/Transforms/InstCombine/compare-alloca.ll
new file mode 100644
index 000000000000..ca24da191779
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-alloca.ll
@@ -0,0 +1,97 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+target datalayout = "p:32:32"
+
+
+define i1 @alloca_argument_compare(i64* %arg) {
+ %alloc = alloca i64
+ %cmp = icmp eq i64* %arg, %alloc
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare
+ ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+ %alloc = alloca i64
+ %cmp = icmp eq i64* %alloc, %arg
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_swapped
+ ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_ne(i64* %arg) {
+ %alloc = alloca i64
+ %cmp = icmp ne i64* %arg, %alloc
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_ne
+ ; CHECK: ret i1 true
+}
+
+define i1 @alloca_argument_compare_derived_ptrs(i64* %arg, i64 %x) {
+ %alloc = alloca i64, i64 8
+ %p = getelementptr i64, i64* %arg, i64 %x
+ %q = getelementptr i64, i64* %alloc, i64 3
+ %cmp = icmp eq i64* %p, %q
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_derived_ptrs
+ ; CHECK: ret i1 false
+}
+
+declare void @escape(i64*)
+define i1 @alloca_argument_compare_escaped_alloca(i64* %arg) {
+ %alloc = alloca i64
+ call void @escape(i64* %alloc)
+ %cmp = icmp eq i64* %alloc, %arg
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_escaped_alloca
+ ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+ ; CHECK: ret i1 %cmp
+}
+
+declare void @check_compares(i1, i1)
+define void @alloca_argument_compare_two_compares(i64* %p) {
+ %q = alloca i64, i64 8
+ %r = getelementptr i64, i64* %p, i64 1
+ %s = getelementptr i64, i64* %q, i64 2
+ %cmp1 = icmp eq i64* %p, %q
+ %cmp2 = icmp eq i64* %r, %s
+ call void @check_compares(i1 %cmp1, i1 %cmp2)
+ ret void
+ ; We will only fold if there is a single cmp.
+ ; CHECK-LABEL: alloca_argument_compare_two_compares
+ ; CHECK: call void @check_compares(i1 %cmp1, i1 %cmp2)
+}
+
+define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr) {
+ %alloc = alloca i64
+ %cmp = icmp eq i64* %alloc, %arg
+ %p = getelementptr i64, i64* %alloc, i64 1
+ store i64* %p, i64** %ptr
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_escaped_through_store
+ ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+ ; CHECK: ret i1 %cmp
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+define i1 @alloca_argument_compare_benign_instrs(i8* %arg) {
+ %alloc = alloca i8
+ call void @llvm.lifetime.start(i64 1, i8* %alloc)
+ %cmp = icmp eq i8* %arg, %alloc
+ %x = load i8, i8* %arg
+ store i8 %x, i8* %alloc
+ call void @llvm.lifetime.end(i64 1, i8* %alloc)
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_argument_compare_benign_instrs
+ ; CHECK: ret i1 false
+}
+
+declare i64* @allocator()
+define i1 @alloca_call_compare() {
+ %p = alloca i64
+ %q = call i64* @allocator()
+ %cmp = icmp eq i64* %p, %q
+ ret i1 %cmp
+ ; CHECK-LABEL: alloca_call_compare
+ ; CHECK: ret i1 false
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index 62cd5b3f94d5..0ed0ac7d8d9c 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -56,3 +56,43 @@ entry:
; CHECK-NOT: zext
; CHECK: ret i32 %2
}
+
+define i1 @test4a(i32 %a) {
+; CHECK-LABEL: @test4a(
+ entry:
+; CHECK: %c = icmp slt i32 %a, 1
+; CHECK-NEXT: ret i1 %c
+ %l = ashr i32 %a, 31
+ %na = sub i32 0, %a
+ %r = lshr i32 %na, 31
+ %signum = or i32 %l, %r
+ %c = icmp slt i32 %signum, 1
+ ret i1 %c
+}
+
+define i1 @test4b(i64 %a) {
+; CHECK-LABEL: @test4b(
+ entry:
+; CHECK: %c = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 %c
+ %l = ashr i64 %a, 63
+ %na = sub i64 0, %a
+ %r = lshr i64 %na, 63
+ %signum = or i64 %l, %r
+ %c = icmp slt i64 %signum, 1
+ ret i1 %c
+}
+
+define i1 @test4c(i64 %a) {
+; CHECK-LABEL: @test4c(
+ entry:
+; CHECK: %c = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 %c
+ %l = ashr i64 %a, 63
+ %na = sub i64 0, %a
+ %r = lshr i64 %na, 63
+ %signum = or i64 %l, %r
+ %signum.trunc = trunc i64 %signum to i32
+ %c = icmp slt i32 %signum.trunc, 1
+ ret i1 %c
+}
diff --git a/test/Transforms/InstCombine/constant-fold-alias.ll b/test/Transforms/InstCombine/constant-fold-alias.ll
index c872f57c37e1..810687255f61 100644
--- a/test/Transforms/InstCombine/constant-fold-alias.ll
+++ b/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
@G2 = global i32 42
@G3 = global [4 x i8] zeroinitializer, align 1
-@A1 = alias bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
-@A2 = alias inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+@A1 = alias i32, bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias i32, inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
define i64 @f1() {
; This cannot be constant folded because G1 is underaligned.
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll
new file mode 100644
index 000000000000..38612c92aaa4
--- /dev/null
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i8 @llvm.ctpop.i8(i8)
+declare void @llvm.assume(i1)
+
+define i1 @test1(i32 %arg) {
+; CHECK: @test1
+; CHECK: ret i1 false
+ %and = and i32 %arg, 15
+ %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+ %res = icmp eq i32 %cnt, 9
+ ret i1 %res
+}
+
+define i1 @test2(i32 %arg) {
+; CHECK: @test2
+; CHECK: ret i1 false
+ %and = and i32 %arg, 1
+ %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+ %res = icmp eq i32 %cnt, 2
+ ret i1 %res
+}
+
+define i1 @test3(i32 %arg) {
+; CHECK: @test3
+; CHECK: ret i1 false
+ ;; Use an assume to make all the bits known without triggering constant
+ ;; folding. This is trying to hit a corner case where we have to avoid
+ ;; taking the log of 0.
+ %assume = icmp eq i32 %arg, 0
+ call void @llvm.assume(i1 %assume)
+ %cnt = call i32 @llvm.ctpop.i32(i32 %arg)
+ %res = icmp eq i32 %cnt, 2
+ ret i1 %res
+}
+
+; Negative test for when we know nothing
+define i1 @test4(i8 %arg) {
+; CHECK: @test4
+; CHECK: ret i1 %res
+ %cnt = call i8 @llvm.ctpop.i8(i8 %arg)
+ %res = icmp eq i8 %cnt, 2
+ ret i1 %res
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 823ec98ebe2b..4b1db9db353b 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -3,7 +3,7 @@
@.str = private constant [3 x i8] c"%c\00"
-define void @foo() nounwind ssp {
+define void @foo() nounwind ssp !dbg !0 {
;CHECK: call i32 @putchar{{.+}} !dbg
%1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
ret void, !dbg !7
@@ -15,9 +15,9 @@ declare i32 @printf(i8*, ...)
!llvm.module.flags = !{!10}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
!1 = !DIFile(filename: "m.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 5, column: 2, scope: !6)
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 3875bcc9b8c6..9c8b2a8e4154 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -6,7 +6,7 @@ declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
declare i8* @foo(i8*, i32, i64, i64) nounwind
-define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp {
+define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp !dbg !1 {
entry:
%__dest.addr = alloca i8*, align 8
%__val.addr = alloca i32, align 4
@@ -31,16 +31,16 @@ entry:
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!30}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, function: i8* (i8*, i32, i64)* @foobar, variables: !25)
+!0 = !DILocalVariable(name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25)
!2 = !DIFile(filename: "string.h", directory: "Game")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
!4 = !DISubroutineType(types: !5)
!5 = !{!6}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !3, baseType: null)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8)
+!7 = !DILocalVariable(name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10)
+!9 = !DILocalVariable(name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10)
!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", line: 80, file: !27, scope: !3, baseType: !11)
!11 = !DIDerivedType(tag: DW_TAG_typedef, name: "__darwin_size_t", line: 90, file: !27, scope: !3, baseType: !12)
!12 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
diff --git a/test/Transforms/InstCombine/demorgan-zext.ll b/test/Transforms/InstCombine/demorgan-zext.ll
new file mode 100644
index 000000000000..da41fac3e350
--- /dev/null
+++ b/test/Transforms/InstCombine/demorgan-zext.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR22723: Recognize De Morgan's Laws when obfuscated by zexts.
+
+define i32 @demorgan_or(i1 %X, i1 %Y) {
+ %zextX = zext i1 %X to i32
+ %zextY = zext i1 %Y to i32
+ %notX = xor i32 %zextX, 1
+ %notY = xor i32 %zextY, 1
+ %or = or i32 %notX, %notY
+ ret i32 %or
+
+; CHECK-LABEL: demorgan_or(
+; CHECK-NEXT: %[[AND:.*]] = and i1 %X, %Y
+; CHECK-NEXT: %[[ZEXT:.*]] = zext i1 %[[AND]] to i32
+; CHECK-NEXT: %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
+; CHECK-NEXT: ret i32 %[[XOR]]
+}
+
+define i32 @demorgan_and(i1 %X, i1 %Y) {
+ %zextX = zext i1 %X to i32
+ %zextY = zext i1 %Y to i32
+ %notX = xor i32 %zextX, 1
+ %notY = xor i32 %zextY, 1
+ %and = and i32 %notX, %notY
+ ret i32 %and
+
+; CHECK-LABEL: demorgan_and(
+; CHECK-NEXT: %[[OR:.*]] = or i1 %X, %Y
+; CHECK-NEXT: %[[ZEXT:.*]] = zext i1 %[[OR]] to i32
+; CHECK-NEXT: %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
+; CHECK-NEXT: ret i32 %[[XOR]]
+}
+
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index e0ff07baae7c..27a316113e52 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -163,7 +163,7 @@ define i32 @test19(i32 %x) {
; CHECK-LABEL: @test19(
; CHECK-NEXT: icmp eq i32 %x, 1
; CHECK-NEXT: zext i1 %{{.*}} to i32
-; CHECK-NEXT ret i32
+; CHECK-NEXT: ret i32
}
define i32 @test20(i32 %x) {
@@ -270,9 +270,7 @@ define <2 x i32> @test31(<2 x i32> %x) {
%div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
ret <2 x i32> %div
; CHECK-LABEL: @test31(
-; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
-; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: ret <2 x i32>
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
}
define i32 @test32(i32 %a, i32 %b) {
@@ -325,3 +323,21 @@ define i32 @test36(i32 %A) {
; CHECK-NEXT: %[[shr:.*]] = lshr exact i32 %[[and]], %A
; CHECK-NEXT: ret i32 %[[shr]]
}
+
+define i32 @test37(i32* %b) {
+entry:
+ store i32 0, i32* %b, align 4
+ %0 = load i32, i32* %b, align 4
+ br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs: ; preds = %entry
+ %mul = mul nsw i32 undef, %0
+ br label %lor.end
+
+lor.end: ; preds = %lor.rhs, %entry
+ %t.0 = phi i32 [ %0, %entry ], [ %mul, %lor.rhs ]
+ %div = sdiv i32 %t.0, 2
+ ret i32 %div
+; CHECK-LABEL: @test37(
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
index 8e6a0e0d93f6..b6a56b9a9a7e 100644
--- a/test/Transforms/InstCombine/exp2-1.ll
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -1,7 +1,8 @@
; Test that the exp2 library call simplifier works correctly.
;
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=CHECK -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=LDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=NOLDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefix=INTRINSIC -check-prefix=NOLDEXP -check-prefix=NOLDEXPF
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -80,21 +81,19 @@ declare double @llvm.exp2.f64(double)
declare float @llvm.exp2.f32(float)
define double @test_simplify9(i8 zeroext %x) {
-; CHECK-LABEL: @test_simplify9(
-; CHECK-WIN-LABEL: @test_simplify9(
+; INTRINSIC-LABEL: @test_simplify9(
%conv = uitofp i8 %x to double
%ret = call double @llvm.exp2.f64(double %conv)
-; CHECK: call double @ldexp
-; CHECK-WIN: call double @ldexp
+; LDEXP: call double @ldexp
+; NOLDEXP-NOT: call double @ldexp
ret double %ret
}
define float @test_simplify10(i8 zeroext %x) {
-; CHECK-LABEL: @test_simplify10(
-; CHECK-WIN-LABEL: @test_simplify10(
+; INTRINSIC-LABEL: @test_simplify10(
%conv = uitofp i8 %x to float
%ret = call float @llvm.exp2.f32(float %conv)
-; CHECK: call float @ldexpf
-; CHECK-WIN-NOT: call float @ldexpf
+; LDEXPF: call float @ldexpf
+; NOLDEXPF-NOT: call float @ldexpf
ret float %ret
}
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index 6319590873a2..9c293581a069 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -48,16 +48,16 @@ define i32 @foo(i32 %a, i32 %b) {
; CHECK: call {{.*}}(i32 [[LOAD]])
; CHECK-NOT: extractvalue
; CHECK: ret i32 [[LOAD]]
-define i32 @extract2gep({i32, i32}* %pair, i32* %P) {
+define i32 @extract2gep({i16, i32}* %pair, i32* %P) {
; The load + extractvalue should be converted
; to an inbounds gep + smaller load.
; The new load should be in the same spot as the old load.
- %L = load {i32, i32}, {i32, i32}* %pair
+ %L = load {i16, i32}, {i16, i32}* %pair
store i32 0, i32* %P
br label %loop
loop:
- %E = extractvalue {i32, i32} %L, 1
+ %E = extractvalue {i16, i32} %L, 1
%C = call i32 @baz(i32 %E)
store i32 %C, i32* %P
%cond = icmp eq i32 %C, 0
@@ -67,17 +67,17 @@ end:
ret i32 %E
}
-; CHECK-LABEL: define i32 @doubleextract2gep(
+; CHECK-LABEL: define i16 @doubleextract2gep(
; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i64 0, i32 1, i32 1
-; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
-; CHECK-NEXT: ret i32 [[LOAD]]
-define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i16, i16* [[GEP]]
+; CHECK-NEXT: ret i16 [[LOAD]]
+define i16 @doubleextract2gep({i16, {i32, i16}}* %arg) {
; The load + extractvalues should be converted
; to a 3-index inbounds gep + smaller load.
- %L = load {i32, {i32, i32}}, {i32, {i32, i32}}* %arg
- %E1 = extractvalue {i32, {i32, i32}} %L, 1
- %E2 = extractvalue {i32, i32} %E1, 1
- ret i32 %E2
+ %L = load {i16, {i32, i16}}, {i16, {i32, i16}}* %arg
+ %E1 = extractvalue {i16, {i32, i16}} %L, 1
+ %E2 = extractvalue {i32, i16} %E1, 1
+ ret i16 %E2
}
; CHECK: define i32 @nogep-multiuse
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
index 0479549bea3f..941270df0e97 100644
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -41,6 +41,7 @@ define fp128 @square_fabs_call_f128(fp128 %x) {
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
define float @square_fabs_intrinsic_f32(float %x) {
%mul = fmul float %x, %x
@@ -98,3 +99,27 @@ define float @square_fabs_shrink_call2(float %x) {
; CHECK-NEXT: ret float %sq
}
+; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
+
+define i32 @fabs_value_tracking_f32(float %x) {
+ %call = call float @llvm.fabs.f32(float %x)
+ %bc = bitcast float %call to i32
+ %and = and i32 %bc, 2147483648
+ ret i32 %and
+
+; CHECK-LABEL: fabs_value_tracking_f32(
+; CHECK: ret i32 0
+}
+
+; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
+
+define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
+ %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
+ %bc = bitcast <4 x float> %call to <4 x i32>
+ %and = and <4 x i32> %bc, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+ ret <4 x i32> %and
+
+; CHECK-LABEL: fabs_value_tracking_v4f32(
+; CHECK: ret <4 x i32> %and
+}
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 4eebdbdfacf1..fd563481b3ed 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -570,7 +570,7 @@ define double @sqrt_intrinsic_arg_squared(double %x) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_arg_squared(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
; CHECK-NEXT: ret double %fabs
}
@@ -584,8 +584,8 @@ define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args1(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -597,8 +597,8 @@ define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args2(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -610,8 +610,8 @@ define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args3(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -623,8 +623,8 @@ define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args4(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -636,8 +636,8 @@ define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args5(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -649,8 +649,8 @@ define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args6(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -675,7 +675,7 @@ define double @sqrt_intrinsic_arg_5th(double %x) #0 {
; CHECK-LABEL: sqrt_intrinsic_arg_5th(
; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
; CHECK-NEXT: ret double %1
}
@@ -692,7 +692,7 @@ define float @sqrt_call_squared_f32(float %x) #0 {
ret float %sqrt
; CHECK-LABEL: sqrt_call_squared_f32(
-; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x)
; CHECK-NEXT: ret float %fabs
}
@@ -702,7 +702,7 @@ define double @sqrt_call_squared_f64(double %x) #0 {
ret double %sqrt
; CHECK-LABEL: sqrt_call_squared_f64(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
; CHECK-NEXT: ret double %fabs
}
@@ -712,7 +712,114 @@ define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
ret fp128 %sqrt
; CHECK-LABEL: sqrt_call_squared_f128(
-; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
+; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x)
; CHECK-NEXT: ret fp128 %fabs
}
+; =========================================================================
+;
+; Test-cases for fmin / fmax
+;
+; =========================================================================
+
+declare double @fmax(double, double)
+declare double @fmin(double, double)
+declare float @fmaxf(float, float)
+declare float @fminf(float, float)
+declare fp128 @fmaxl(fp128, fp128)
+declare fp128 @fminl(fp128, fp128)
+
+; No NaNs is the minimum requirement to replace these calls.
+; This should always be set when unsafe-fp-math is true, but
+; alternate the attributes for additional test coverage.
+; 'nsz' is implied by the definition of fmax or fmin itself.
+attributes #1 = { "no-nans-fp-math" = "true" }
+
+; Shrink and remove the call.
+define float @max1(float %a, float %b) #0 {
+ %c = fpext float %a to double
+ %d = fpext float %b to double
+ %e = call double @fmax(double %c, double %d)
+ %f = fptrunc double %e to float
+ ret float %f
+
+; CHECK-LABEL: max1(
+; CHECK-NEXT: fcmp fast ogt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+define float @max2(float %a, float %b) #1 {
+ %c = call float @fmaxf(float %a, float %b)
+ ret float %c
+
+; CHECK-LABEL: max2(
+; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+
+define double @max3(double %a, double %b) #0 {
+ %c = call double @fmax(double %a, double %b)
+ ret double %c
+
+; CHECK-LABEL: max3(
+; CHECK-NEXT: fcmp fast ogt double %a, %b
+; CHECK-NEXT: select {{.*}} double %a, double %b
+; CHECK-NEXT: ret
+}
+
+define fp128 @max4(fp128 %a, fp128 %b) #1 {
+ %c = call fp128 @fmaxl(fp128 %a, fp128 %b)
+ ret fp128 %c
+
+; CHECK-LABEL: max4(
+; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b
+; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
+; CHECK-NEXT: ret
+}
+
+; Shrink and remove the call.
+define float @min1(float %a, float %b) #1 {
+ %c = fpext float %a to double
+ %d = fpext float %b to double
+ %e = call double @fmin(double %c, double %d)
+ %f = fptrunc double %e to float
+ ret float %f
+
+; CHECK-LABEL: min1(
+; CHECK-NEXT: fcmp nnan nsz olt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+define float @min2(float %a, float %b) #0 {
+ %c = call float @fminf(float %a, float %b)
+ ret float %c
+
+; CHECK-LABEL: min2(
+; CHECK-NEXT: fcmp fast olt float %a, %b
+; CHECK-NEXT: select {{.*}} float %a, float %b
+; CHECK-NEXT: ret
+}
+
+define double @min3(double %a, double %b) #1 {
+ %c = call double @fmin(double %a, double %b)
+ ret double %c
+
+; CHECK-LABEL: min3(
+; CHECK-NEXT: fcmp nnan nsz olt double %a, %b
+; CHECK-NEXT: select {{.*}} double %a, double %b
+; CHECK-NEXT: ret
+}
+
+define fp128 @min4(fp128 %a, fp128 %b) #0 {
+ %c = call fp128 @fminl(fp128 %a, fp128 %b)
+ ret fp128 %c
+
+; CHECK-LABEL: min4(
+; CHECK-NEXT: fcmp fast olt fp128 %a, %b
+; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index c8763dc199a9..d27fb5d89f09 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,9 +1,12 @@
; Test that the ffs* library call simplifier works correctly.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-FFS
+; RUN: opt -instcombine -mtriple=arm64-apple-ios9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=arm64-apple-tvos9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=x86_64-apple-macosx10.11 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=x86_64-freebsd-gnu -S %s | FileCheck --check-prefix=CHECK-FFS %s
declare i32 @ffs(i32)
declare i32 @ffsl(i32)
@@ -19,17 +22,17 @@ define i32 @test_simplify1() {
}
define i32 @test_simplify2() {
-; CHECK-LINUX-LABEL: @test_simplify2(
+; CHECK-FFS-LABEL: @test_simplify2(
%ret = call i32 @ffsl(i32 0)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 0
+; CHECK-FFS-NEXT: ret i32 0
}
define i32 @test_simplify3() {
-; CHECK-LINUX-LABEL: @test_simplify3(
+; CHECK-FFS-LABEL: @test_simplify3(
%ret = call i32 @ffsll(i64 0)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 0
+; CHECK-FFS-NEXT: ret i32 0
}
; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
@@ -56,45 +59,45 @@ define i32 @test_simplify6() {
}
define i32 @test_simplify7() {
-; CHECK-LINUX-LABEL: @test_simplify7(
+; CHECK-FFS-LABEL: @test_simplify7(
%ret = call i32 @ffsl(i32 65536)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 17
+; CHECK-FFS-NEXT: ret i32 17
}
define i32 @test_simplify8() {
-; CHECK-LINUX-LABEL: @test_simplify8(
+; CHECK-FFS-LABEL: @test_simplify8(
%ret = call i32 @ffsll(i64 1024)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 11
+; CHECK-FFS-NEXT: ret i32 11
}
define i32 @test_simplify9() {
-; CHECK-LINUX-LABEL: @test_simplify9(
+; CHECK-FFS-LABEL: @test_simplify9(
%ret = call i32 @ffsll(i64 65536)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 17
+; CHECK-FFS-NEXT: ret i32 17
}
define i32 @test_simplify10() {
-; CHECK-LINUX-LABEL: @test_simplify10(
+; CHECK-FFS-LABEL: @test_simplify10(
%ret = call i32 @ffsll(i64 17179869184)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 35
+; CHECK-FFS-NEXT: ret i32 35
}
define i32 @test_simplify11() {
-; CHECK-LINUX-LABEL: @test_simplify11(
+; CHECK-FFS-LABEL: @test_simplify11(
%ret = call i32 @ffsll(i64 281474976710656)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 49
+; CHECK-FFS-NEXT: ret i32 49
}
define i32 @test_simplify12() {
-; CHECK-LINUX-LABEL: @test_simplify12(
+; CHECK-FFS-LABEL: @test_simplify12(
%ret = call i32 @ffsll(i64 1152921504606846976)
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 61
+; CHECK-FFS-NEXT: ret i32 61
}
; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
@@ -102,7 +105,7 @@ define i32 @test_simplify12() {
define i32 @test_simplify13(i32 %x) {
; CHECK-LABEL: @test_simplify13(
%ret = call i32 @ffs(i32 %x)
-; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@@ -111,24 +114,24 @@ define i32 @test_simplify13(i32 %x) {
}
define i32 @test_simplify14(i32 %x) {
-; CHECK-LINUX-LABEL: @test_simplify14(
+; CHECK-FFS-LABEL: @test_simplify14(
%ret = call i32 @ffsl(i32 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
-; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
-; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
+; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 [[RET]]
+; CHECK-FFS-NEXT: ret i32 [[RET]]
}
define i32 @test_simplify15(i64 %x) {
-; CHECK-LINUX-LABEL: @test_simplify15(
+; CHECK-FFS-LABEL: @test_simplify15(
%ret = call i32 @ffsll(i64 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
-; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
-; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
-; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
+; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
+; CHECK-FFS-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
+; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
+; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 [[RET]]
+; CHECK-FFS-NEXT: ret i32 [[RET]]
}
diff --git a/test/Transforms/InstCombine/fold-phi-load-metadata.ll b/test/Transforms/InstCombine/fold-phi-load-metadata.ll
new file mode 100644
index 000000000000..7fa26b46e25d
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-phi-load-metadata.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@g1 = common global i32* null, align 8
+
+%struct.S1 = type { i32, float }
+%struct.S2 = type { float, i32 }
+
+; Check that instcombine preserves metadata when it merges two loads.
+;
+; CHECK: return:
+; CHECK: load i32*, i32** %{{[a-z0-9.]+}}, align 8, !nonnull ![[EMPTYNODE:[0-9]+]]
+; CHECK: load i32, i32* %{{[a-z0-9.]+}}, align 4, !tbaa ![[TBAA:[0-9]+]], !range ![[RANGE:[0-9]+]], !invariant.load ![[EMPTYNODE:[0-9]+]], !alias.scope ![[ALIAS_SCOPE:[0-9]+]], !noalias ![[NOALIAS:[0-9]+]]
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @phi_load_metadata(%struct.S1* %s1, %struct.S2* %s2, i32 %c, i32** %x0, i32 **%x1) #0 {
+entry:
+ %tobool = icmp eq i32 %c, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %i = getelementptr inbounds %struct.S2, %struct.S2* %s2, i64 0, i32 1
+ %val = load i32, i32* %i, align 4, !tbaa !0, !alias.scope !13, !noalias !14, !invariant.load !17, !range !18
+ %p0 = load i32*, i32** %x0, align 8, !nonnull !17
+ br label %return
+
+if.end: ; preds = %entry
+ %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i64 0, i32 0
+ %val2 = load i32, i32* %i2, align 4, !tbaa !2, !alias.scope !15, !noalias !16, !invariant.load !17, !range !19
+ %p1 = load i32*, i32** %x1, align 8, !nonnull !17
+ br label %return
+
+return: ; preds = %if.end, %if.then
+ %retval = phi i32 [ %val, %if.then ], [ %val2, %if.end ]
+ %pval = phi i32* [ %p0, %if.then ], [ %p1, %if.end ]
+ store i32* %pval, i32** @g1, align 8
+ ret i32 %retval
+}
+
+; CHECK: ![[EMPTYNODE]] = !{}
+; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
+; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
+; CHECK: ![[RANGE]] = !{i32 10, i32 25}
+; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]}
+; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
+; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
+; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
+; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
+; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
+
+!0 = !{!1, !4, i64 4}
+!1 = !{!"", !7, i64 0, !4, i64 4}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"float", !5, i64 0}
+!8 = !{!8, !"some domain"}
+!9 = !{!9, !8, !"scope0"}
+!10 = !{!10, !8, !"scope1"}
+!11 = !{!11, !8, !"scope2"}
+!12 = !{!12, !8, !"scope3"}
+!13 = !{!9, !10}
+!14 = !{!11, !12}
+!15 = !{!9, !11}
+!16 = !{!10, !12}
+!17 = !{}
+!18 = !{i32 10, i32 20}
+!19 = !{i32 15, i32 25}
diff --git a/test/Transforms/InstCombine/gc.relocate.ll b/test/Transforms/InstCombine/gc.relocate.ll
index a51aac10eb57..308258a19417 100644
--- a/test/Transforms/InstCombine/gc.relocate.ll
+++ b/test/Transforms/InstCombine/gc.relocate.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
; then the return attribute of gc.relocate is dereferenceable(N).
declare zeroext i1 @return_i1()
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
; Checks that a dereferenceabler pointer
@@ -15,7 +15,38 @@ define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc
; CHECK: call dereferenceable(8)
entry:
%load = load i32, i32 addrspace(1)* %dparam
- %tok = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
- %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 7, i32 7)
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
ret i32 addrspace(1)* %relocate
}
+
+define i32 @explicit_nonnull(i32 addrspace(1)* nonnull %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @explicit_nonnull
+; CHECK: ret i32 1
+entry:
+ %load = load i32, i32 addrspace(1)* %dparam
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
+ %cmp = icmp eq i32 addrspace(1)* %relocate, null
+ %ret_val = select i1 %cmp, i32 0, i32 1
+ ret i32 %ret_val
+}
+
+define i32 @implicit_nonnull(i32 addrspace(1)* %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @implicit_nonnull
+; CHECK: ret i32 1
+entry:
+ %cond = icmp eq i32 addrspace(1)* %dparam, null
+ br i1 %cond, label %no_gc, label %gc
+gc:
+ %load = load i32, i32 addrspace(1)* %dparam
+ %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+ %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
+ %cmp = icmp eq i32 addrspace(1)* %relocate, null
+ %ret_val = select i1 %cmp, i32 0, i32 1
+ ret i32 %ret_val
+no_gc:
+ unreachable
+}
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
index b98ea4cd1159..cc90d714be73 100644
--- a/test/Transforms/InstCombine/gepphigep.ll
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -134,3 +134,53 @@ exit:
; CHECK: getelementptr{{.*}}i64 1
; CHECK: exit:
}
+
+@.str.4 = external unnamed_addr constant [100 x i8], align 1
+
+; Instcombine shouldn't add new PHI nodes while folding GEPs if that will leave
+; old PHI nodes behind as this is not clearly beneficial.
+; CHECK-LABEL: @test5(
+define void @test5(i16 *%idx, i8 **%in) #0 {
+entry:
+ %0 = load i8*, i8** %in
+ %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 1
+ %1 = load i8, i8* %incdec.ptr, align 1
+ %cmp23 = icmp eq i8 %1, 54
+ br i1 %cmp23, label %while.cond, label %if.then.25
+
+if.then.25:
+ call void @g(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @.str.4, i32 0, i32 0))
+ br label %while.cond
+
+while.cond:
+; CHECK-LABEL: while.cond
+; CHECK-NOT: phi i8* [ %0, %entry ], [ %Ptr, %while.body ], [ %0, %if.then.25 ]
+ %Ptr = phi i8* [ %incdec.ptr, %entry ], [ %incdec.ptr32, %while.body], [%incdec.ptr, %if.then.25 ]
+ %2 = load i8, i8* %Ptr
+ %and = and i8 %2, 64
+ %lnot = icmp eq i8 %and, 0
+ br i1 %lnot, label %while.body, label %while.cond.33
+
+while.body:
+ %incdec.ptr32 = getelementptr inbounds i8, i8* %Ptr, i32 1
+ br label %while.cond
+
+while.cond.33:
+ %incdec.ptr34 = getelementptr inbounds i8, i8* %Ptr, i32 1
+ br label %while.cond.57
+
+while.cond.57:
+ %3 = load i8, i8* %incdec.ptr34, align 1
+ %conv59 = zext i8 %3 to i32
+ %arrayidx61 = getelementptr inbounds i16, i16* %idx, i32 %conv59
+ %4 = load i16, i16* %arrayidx61, align 2
+ %and63 = and i16 %4, 2048
+ %tobool64 = icmp eq i16 %and63, 0
+ br i1 %tobool64, label %while.cond.73, label %while.cond.57
+
+while.cond.73:
+ br label %while.cond.73
+
+}
+
+declare void @g(i8*)
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
index 041adf76b5e1..f035683170e1 100644
--- a/test/Transforms/InstCombine/icmp-range.ll
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -54,8 +54,97 @@ define i1 @test_nonzero6(i8* %argw) {
ret i1 %rval
}
+; Constant not in range, should return true.
+define i1 @test_not_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_not_in_range
+; CHECK: ret i1 true
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp ne i32 %val, 6
+ ret i1 %rval
+}
+
+; Constant in range, can not fold.
+define i1 @test_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_in_range
+; CHECK: icmp ne i32 %val, 3
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp ne i32 %val, 3
+ ret i1 %rval
+}
+
+; Values in range greater than constant.
+define i1 @test_range_sgt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_sgt_constant
+; CHECK: ret i1 true
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp sgt i32 %val, 0
+ ret i1 %rval
+}
+
+; Values in range less than constant.
+define i1 @test_range_slt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_slt_constant
+; CHECK: ret i1 false
+ %val = load i32, i32* %arg, !range !0
+ %rval = icmp sgt i32 %val, 6
+ ret i1 %rval
+}
+
+; Values in union of multiple sub ranges not equal to constant.
+define i1 @test_multi_range1(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range1
+; CHECK: ret i1 true
+ %val = load i32, i32* %arg, !range !4
+ %rval = icmp ne i32 %val, 0
+ ret i1 %rval
+}
+
+; Values in multiple sub ranges not equal to constant, but in
+; union of sub ranges could possibly equal to constant. This
+; in theory could also be folded and might be implemented in
+; the future if shown profitable in practice.
+define i1 @test_multi_range2(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range2
+; CHECK: icmp ne i32 %val, 7
+ %val = load i32, i32* %arg, !range !4
+ %rval = icmp ne i32 %val, 7
+ ret i1 %rval
+}
+
+; Values' ranges overlap each other, so it can not be simplified.
+define i1 @test_two_ranges(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges
+; CHECK: icmp ult i32 %val2, %val1
+ %val1 = load i32, i32* %arg1, !range !5
+ %val2 = load i32, i32* %arg2, !range !6
+ %rval = icmp ult i32 %val2, %val1
+ ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_ranges2(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges2
+; CHECK: ret i1 false
+ %val1 = load i32, i32* %arg1, !range !0
+ %val2 = load i32, i32* %arg2, !range !6
+ %rval = icmp ult i32 %val2, %val1
+ ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to true.
+define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges3
+; CHECK: ret i1 true
+ %val1 = load i32, i32* %arg1, !range !0
+ %val2 = load i32, i32* %arg2, !range !6
+ %rval = icmp ugt i32 %val2, %val1
+ ret i1 %rval
+}
!0 = !{i32 1, i32 6}
!1 = !{i32 0, i32 6}
!2 = !{i8 0, i8 1}
!3 = !{i8 0, i8 6}
+!4 = !{i32 1, i32 6, i32 8, i32 10}
+!5 = !{i32 5, i32 10}
+!6 = !{i32 8, i32 16}
diff --git a/test/Transforms/InstCombine/icmp-shr.ll b/test/Transforms/InstCombine/icmp-shr.ll
index 52414b99cca7..4fa85a72baf7 100644
--- a/test/Transforms/InstCombine/icmp-shr.ll
+++ b/test/Transforms/InstCombine/icmp-shr.ll
@@ -376,3 +376,12 @@ define i1 @PR21222(i32 %B) {
%cmp = icmp eq i32 %shr, -2
ret i1 %cmp
}
+
+; CHECK-LABEL: @PR24873(
+; CHECK: %[[icmp:.*]] = icmp ugt i64 %V, 61
+; CHECK-NEXT: ret i1 %[[icmp]]
+define i1 @PR24873(i64 %V) {
+ %ashr = ashr i64 -4611686018427387904, %V
+ %icmp = icmp eq i64 %ashr, -1
+ ret i1 %icmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index edcf76d5a7d2..7d6ec96b5328 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -819,8 +819,8 @@ define i1 @test68(i32 %x) nounwind uwtable {
; PR14708
; CHECK-LABEL: @test69(
-; CHECK: %1 = and i32 %c, -33
-; CHECK: %2 = icmp eq i32 %1, 65
+; CHECK: %1 = or i32 %c, 32
+; CHECK: %2 = icmp eq i32 %1, 97
; CHECK: ret i1 %2
define i1 @test69(i32 %c) nounwind uwtable {
%1 = icmp eq i32 %c, 97
@@ -1603,3 +1603,72 @@ define i32 @f7(i32 %a, i32 %b) {
%s = select i1 %cmp, i32 10000, i32 0
ret i32 %s
}
+
+; CHECK: @f8(
+; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[RESULT]]
+define i1 @f8(i32 %val, i32 %lim) {
+ %lim.sub = add i32 %lim, -1
+ %val.and = and i32 %val, %lim.sub
+ %r = icmp ult i32 %val.and, %lim
+ ret i1 %r
+}
+
+; CHECK: @f9(
+; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[RESULT]]
+define i1 @f9(i32 %val, i32 %lim) {
+ %lim.sub = sub i32 %lim, 1
+ %val.and = and i32 %val, %lim.sub
+ %r = icmp ult i32 %val.and, %lim
+ ret i1 %r
+}
+
+; CHECK: @f10(
+; CHECK: [[CMP:%.*]] = icmp uge i16 %p, mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16))
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @f10(i16 %p) {
+entry:
+ %cmp580 = icmp ule i16 mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16)), %p
+ ret i1 %cmp580
+}
+
+; CHECK-LABEL: @cmp_sgt_rhs_dec
+; CHECK-NOT: sub
+; CHECK: icmp sge i32 %conv, %i
+define i1 @cmp_sgt_rhs_dec(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %dec = sub nsw i32 %i, 1
+ %cmp = icmp sgt i32 %conv, %dec
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_sle_rhs_dec
+; CHECK-NOT: sub
+; CHECK: icmp slt i32 %conv, %i
+define i1 @cmp_sle_rhs_dec(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %dec = sub nsw i32 %i, 1
+ %cmp = icmp sle i32 %conv, %dec
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_sge_rhs_inc
+; CHECK-NOT: add
+; CHECK: icmp sgt i32 %conv, %i
+define i1 @cmp_sge_rhs_inc(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp sge i32 %conv, %inc
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_slt_rhs_inc
+; CHECK-NOT: add
+; CHECK: icmp sle i32 %conv, %i
+define i1 @cmp_slt_rhs_inc(float %x, i32 %i) {
+ %conv = fptosi float %x to i32
+ %inc = add nsw i32 %i, 1
+ %cmp = icmp slt i32 %conv, %inc
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/inline-intrinsic-assert.ll b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
index af34277563e0..c6446d43cffd 100644
--- a/test/Transforms/InstCombine/inline-intrinsic-assert.ll
+++ b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
@@ -9,7 +9,7 @@ define float @foo(float %f1) #0 {
ret float %call
; CHECK-LABEL: @foo(
-; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: call fast float @llvm.fabs.f32
; CHECK-NEXT: ret float
}
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 8929c82def7b..c75c771407e5 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -24,14 +24,51 @@ define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
ret <4 x i16> %vec.3
}
-define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
; CHECK-LABEL: @test_vcopyq_lane_p64
-; CHECK: extractelement
-; CHECK: insertelement
-; CHECK-NOT: shufflevector
-entry:
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT: shufflevector <2 x i64> %a, <2 x i64> %[[WIDEVEC]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: ret <2 x i64> %res
%elt = extractelement <1 x i64> %b, i32 0
%res = insertelement <2 x i64> %a, i64 %elt, i32 1
ret <2 x i64> %res
}
+; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109
+
+define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract2(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x float> %i2
+ %e1 = extractelement <2 x float> %ext, i32 0
+ %e2 = extractelement <2 x float> %ext, i32 1
+ %i1 = insertelement <4 x float> %ins, float %e1, i32 1
+ %i2 = insertelement <4 x float> %i1, float %e2, i32 3
+ ret <4 x float> %i2
+}
+
+define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
+; CHECK-LABEL: @widen_extract3(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <3 x float> %ext, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
+; CHECK-NEXT: ret <4 x float> %i3
+ %e1 = extractelement <3 x float> %ext, i32 0
+ %e2 = extractelement <3 x float> %ext, i32 1
+ %e3 = extractelement <3 x float> %ext, i32 2
+ %i1 = insertelement <4 x float> %ins, float %e1, i32 2
+ %i2 = insertelement <4 x float> %i1, float %e2, i32 1
+ %i3 = insertelement <4 x float> %i2, float %e3, i32 0
+ ret <4 x float> %i3
+}
+
+define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract4(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: shufflevector <8 x float> %ins, <8 x float> %[[WIDEVEC]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x float> %i1
+ %e1 = extractelement <2 x float> %ext, i32 0
+ %i1 = insertelement <8 x float> %ins, float %e1, i32 2
+ ret <8 x float> %i1
+}
+
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index bea063787a75..88f032498271 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -19,6 +19,11 @@ declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
declare double @llvm.cos.f64(double %Val) nounwind readonly
declare double @llvm.sin.f64(double %Val) nounwind readonly
+declare double @llvm.floor.f64(double %Val) nounwind readonly
+declare double @llvm.ceil.f64(double %Val) nounwind readonly
+declare double @llvm.trunc.f64(double %Val) nounwind readonly
+declare double @llvm.rint.f64(double %Val) nounwind readonly
+declare double @llvm.nearbyint.f64(double %Val) nounwind readonly
define i8 @uaddtest1(i8 %A, i8 %B) {
%x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -447,3 +452,63 @@ entry:
; CHECK-LABEL: @sin(
; CHECK: store volatile double 0.000000e+00, double* %P
}
+
+define void @floor(double *%P) {
+entry:
+ %B = tail call double @llvm.floor.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.floor.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @floor(
+; CHECK: store volatile double 1.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
+
+define void @ceil(double *%P) {
+entry:
+ %B = tail call double @llvm.ceil.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.ceil.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @ceil(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -1.000000e+00, double* %P, align 8
+}
+
+define void @trunc(double *%P) {
+entry:
+ %B = tail call double @llvm.trunc.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.trunc.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @trunc(
+; CHECK: store volatile double 1.000000e+00, double* %P, align 8
+; CHECK: store volatile double -1.000000e+00, double* %P, align 8
+}
+
+define void @rint(double *%P) {
+entry:
+ %B = tail call double @llvm.rint.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.rint.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @rint(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
+
+define void @nearbyint(double *%P) {
+entry:
+ %B = tail call double @llvm.nearbyint.f64(double 1.5) nounwind
+ store volatile double %B, double* %P
+ %C = tail call double @llvm.nearbyint.f64(double -1.5) nounwind
+ store volatile double %C, double* %P
+ ret void
+; CHECK-LABEL: @nearbyint(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
diff --git a/test/Transforms/InstCombine/lifetime.ll b/test/Transforms/InstCombine/lifetime.ll
new file mode 100644
index 000000000000..e5cbe3401410
--- /dev/null
+++ b/test/Transforms/InstCombine/lifetime.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @foo(i8* nocapture, i8* nocapture)
+
+define void @bar(i1 %flag) !dbg !4 {
+entry:
+; CHECK-LABEL: @bar(
+; CHECK: %[[T:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %text
+; CHECK: %[[B:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %buff
+; CHECK: if:
+; CHECK-NEXT: br label %bb2
+; CHECK: bb2:
+; CHECK-NEXT: br label %bb3
+; CHECK: bb3:
+; CHECK-NEXT: call void @llvm.dbg.declare
+; CHECK-NEXT: br label %fin
+; CHECK: call void @llvm.lifetime.start(i64 1, i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @foo(i8* %[[B]], i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[T]])
+ %text = alloca [1 x i8], align 1
+ %buff = alloca [1 x i8], align 1
+ %0 = getelementptr inbounds [1 x i8], [1 x i8]* %text, i64 0, i64 0
+ %1 = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i64 0, i64 0
+ br i1 %flag, label %if, label %else
+
+if:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.lifetime.start(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ br label %bb2
+
+bb2:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.lifetime.start(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ call void @llvm.lifetime.end(i64 1, i8* %1)
+ br label %bb3
+
+bb3:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.dbg.declare(metadata [1 x i8]* %text, metadata !14, metadata !25), !dbg !26
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ br label %fin
+
+else:
+ call void @llvm.lifetime.start(i64 1, i8* %0)
+ call void @llvm.lifetime.start(i64 1, i8* %1)
+ call void @foo(i8* %1, i8* %0)
+ call void @llvm.lifetime.end(i64 1, i8* %1)
+ call void @llvm.lifetime.end(i64 1, i8* %0)
+ br label %fin
+
+fin:
+ ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/user")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!8 = !{!9, !11, !12, !14, !21}
+!9 = !DILocalVariable(name: "Size", arg: 1, scope: !4, file: !1, line: 2, type: !10)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "flag", arg: 2, scope: !4, file: !1, line: 2, type: !7)
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 3, type: !10)
+!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+!14 = !DILocalVariable(name: "text", scope: !15, file: !1, line: 4, type: !17)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 3, column: 30)
+!16 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
+!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 8, align: 8, elements: !19)
+!18 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!19 = !{!20}
+!20 = !DISubrange(count: 1)
+!21 = !DILocalVariable(name: "buff", scope: !15, file: !1, line: 5, type: !17)
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)"}
+!25 = !DIExpression()
+!26 = !DILocation(line: 4, column: 10, scope: !15)
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index b0bfdc4c4c54..fe1bf1517539 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -148,8 +148,8 @@ define i1 @test8(i32 %X) {
%S = icmp eq i16 %R, 0
ret i1 %S
; CHECK-LABEL: @test8(
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
+; CHECK-NEXT: or i32 %X, 1
+; CHECK-NEXT: icmp eq i32 {{.*}}, 9
; CHECK-NEXT: ret i1
}
@@ -233,7 +233,8 @@ define i1 @test10_struct_arr(i32 %x) {
define i1 @test10_struct_arr_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+; CHECK-NEXT: %r = icmp ne i32 %x, 1
+; CHECK-NEXT: ret i1 %r
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
diff --git a/test/Transforms/InstCombine/load-combine-metadata-2.ll b/test/Transforms/InstCombine/load-combine-metadata-2.ll
new file mode 100644
index 000000000000..bec0d7d2c36b
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-2.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+ %a = load i32*, i32** %0, !align !0
+ %b = load i32*, i32** %0, !align !1
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/load-combine-metadata-3.ll b/test/Transforms/InstCombine/load-combine-metadata-3.ll
new file mode 100644
index 000000000000..bad4bb240590
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-3.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+ %a = load i32*, i32** %0, !dereferenceable !0
+ %b = load i32*, i32** %0, !dereferenceable !1
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/load-combine-metadata-4.ll b/test/Transforms/InstCombine/load-combine-metadata-4.ll
new file mode 100644
index 000000000000..2a1ffcd0605e
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-4.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+ %a = load i32*, i32** %0, !dereferenceable_or_null !0
+ %b = load i32*, i32** %0, !dereferenceable_or_null !1
+ store i32 0, i32* %a
+ store i32 0, i32* %b
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
diff --git a/test/Transforms/InstCombine/load-combine-metadata.ll b/test/Transforms/InstCombine/load-combine-metadata.ll
index 9b9c1fe607b9..24b26fa42135 100644
--- a/test/Transforms/InstCombine/load-combine-metadata.ll
+++ b/test/Transforms/InstCombine/load-combine-metadata.ll
@@ -17,9 +17,9 @@ define void @test_load_load_combine_metadata(i32*, i32*, i32*) {
ret void
}
-; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9}
-!0 = !{ i32 0, i32 1 }
-!1 = !{ i32 8, i32 9 }
+; CHECK: ![[RANGE]] = !{i32 0, i32 5, i32 7, i32 9}
+!0 = !{ i32 0, i32 5 }
+!1 = !{ i32 7, i32 9 }
!2 = !{!2}
!3 = !{!3, !2}
!4 = !{!4, !2}
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
index a30c0bc852ea..f72e36a7ea37 100644
--- a/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -31,7 +31,7 @@ define float @test_load_cast_combine_range(i32* %ptr) {
; CHECK-NOT: !range
; CHECK: ret float
entry:
- %l = load i32, i32* %ptr, !range !5
+ %l = load i32, i32* %ptr, !range !6
%c = bitcast i32 %l to float
ret float %c
}
@@ -57,6 +57,39 @@ entry:
ret i32 %c
}
+define i8* @test_load_cast_combine_align(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves align
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_align(
+; CHECK: load i8*, i8** %{{.*}}, !align !5
+entry:
+ %l = load i32*, i32** %ptr, !align !5
+ %c = bitcast i32* %l to i8*
+ ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable !5
+entry:
+ %l = load i32*, i32** %ptr, !dereferenceable !5
+ %c = bitcast i32* %l to i8*
+ ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref_or_null(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves
+; dereferenceable_or_null metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable_or_null !5
+entry:
+ %l = load i32*, i32** %ptr, !dereferenceable_or_null !5
+ %c = bitcast i32* %l to i8*
+ ret i8* %c
+}
+
define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
; metadata.
@@ -110,4 +143,5 @@ entry:
!2 = !{ !2, !1 }
!3 = !{ }
!4 = !{ i32 1 }
-!5 = !{ i32 0, i32 42 }
+!5 = !{ i64 8 }
+!6 = !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/log-pow-nofastmath.ll b/test/Transforms/InstCombine/log-pow-nofastmath.ll
new file mode 100644
index 000000000000..faaef97311ec
--- /dev/null
+++ b/test/Transforms/InstCombine/log-pow-nofastmath.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) {
+entry:
+ %pow = call double @llvm.pow.f64(double %x, double %y)
+ %call = call double @log(double %pow)
+ ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK: %pow = call double @llvm.pow.f64(double %x, double %y)
+; CHECK: %call = call double @log(double %pow)
+; CHECK: ret double %call
+; CHECK: }
+
+define double @test3(double %x) {
+ %call2 = call double @exp2(double %x)
+ %call3 = call double @log(double %call2)
+ ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK: %call2 = call double @exp2(double %x)
+; CHECK: %call3 = call double @log(double %call2)
+; CHECK: ret double %call3
+; CHECK: }
+
+declare double @log(double)
+declare double @exp2(double)
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/InstCombine/log-pow.ll b/test/Transforms/InstCombine/log-pow.ll
new file mode 100644
index 000000000000..c5ca1688d34a
--- /dev/null
+++ b/test/Transforms/InstCombine/log-pow.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) #0 {
+entry:
+ %pow = call double @llvm.pow.f64(double %x, double %y)
+ %call = call double @log(double %pow) #0
+ ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK: %log = call fast double @log(double %x) #0
+; CHECK: %mul = fmul fast double %log, %y
+; CHECK: ret double %mul
+; CHECK: }
+
+define double @test2(double ()* %fptr, double %p1) #0 {
+ %call1 = call double %fptr()
+ %pow = call double @log(double %call1)
+ ret double %pow
+}
+
+; CHECK-LABEL: @test2
+; CHECK: log
+
+define double @test3(double %x) #0 {
+ %call2 = call double @exp2(double %x) #0
+ %call3 = call double @log(double %call2) #0
+ ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK: %call2 = call double @exp2(double %x) #0
+; CHECK: %logmul = fmul fast double %x, 0x3FE62E42FEFA39EF
+; CHECK: ret double %logmul
+; CHECK: }
+
+declare double @log(double) #0
+declare double @exp2(double) #0
+declare double @llvm.pow.f64(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 138001ace951..8fcb8214360d 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -186,3 +186,14 @@ define void @test8() {
call void @_ZdaPvj(i8* %naj, i32 32) builtin
ret void
}
+
+declare noalias i8* @"\01??2@YAPEAX_K@Z"(i64) nobuiltin
+declare void @"\01??3@YAXPEAX@Z"(i8*) nobuiltin
+
+; CHECK-LABEL: @test9(
+define void @test9() {
+ ; CHECK-NOT: call
+ %new_long_long = call noalias i8* @"\01??2@YAPEAX_K@Z"(i64 32) builtin
+ call void @"\01??3@YAXPEAX@Z"(i8* %new_long_long) builtin
+ ret void
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index db15bd66b715..f9ff479e3add 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -2,7 +2,7 @@
;
; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64"
@foo = constant [4 x i8] c"foo\00"
@hel = constant [4 x i8] c"hel\00"
@@ -70,3 +70,54 @@ define i32 @test_simplify6() {
ret i32 %ret
; CHECK: ret i32 -1
}
+
+; Check memcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2
+
+define i1 @test_simplify7(i64 %x, i64 %y) {
+; CHECK-LABEL: @test_simplify7(
+ %x.addr = alloca i64, align 8
+ %y.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ store i64 %y, i64* %y.addr, align 8
+ %xptr = bitcast i64* %x.addr to i8*
+ %yptr = bitcast i64* %y.addr to i8*
+ %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 8)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+; CHECK: %cmp = icmp eq i64 %x, %y
+; CHECK: ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2
+
+define i1 @test_simplify8(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_simplify8(
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %xptr = bitcast i32* %x.addr to i8*
+ %yptr = bitcast i32* %y.addr to i8*
+ %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 4)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+; CHECK: %cmp = icmp eq i32 %x, %y
+; CHECK: ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2
+
+define i1 @test_simplify9(i16 %x, i16 %y) {
+; CHECK-LABEL: @test_simplify9(
+ %x.addr = alloca i16, align 2
+ %y.addr = alloca i16, align 2
+ store i16 %x, i16* %x.addr, align 2
+ store i16 %y, i16* %y.addr, align 2
+ %xptr = bitcast i16* %x.addr to i8*
+ %yptr = bitcast i16* %y.addr to i8*
+ %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 2)
+ %cmp = icmp eq i32 %call, 0
+ ret i1 %cmp
+; CHECK: %cmp = icmp eq i16 %x, %y
+; CHECK: ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index 27f7293a6bce..56ea14c8292d 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -63,4 +63,30 @@ define i8* @test_no_simplify2() {
ret i8* %ret
}
+; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR
+define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
+; CHECK-LABEL: test_rauw
+entry:
+ %call49 = call i64 @strlen(i8* %a)
+ %add180 = add i64 %call49, 1
+ %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false)
+ %call50 = call i8* @__memmove_chk(i8* %b, i8* %a, i64 %add180, i64 %yo107)
+; CHECK: %strlen = call i64 @strlen(i8* %b)
+; CHECK-NEXT: %strchr2 = getelementptr i8, i8* %b, i64 %strlen
+ %call51i = call i8* @strrchr(i8* %b, i32 0)
+ %d = load i8*, i8** %c, align 8
+ %sub182 = ptrtoint i8* %d to i64
+ %sub183 = ptrtoint i8* %b to i64
+ %sub184 = sub i64 %sub182, %sub183
+ %add52.i.i = add nsw i64 %sub184, 1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %strchr2
+ %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
+ ret i32 4
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
+declare i8* @strrchr(i8*, i32)
+declare i64 @strlen(i8* nocapture)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll
new file mode 100644
index 000000000000..b90afe3405f7
--- /dev/null
+++ b/test/Transforms/InstCombine/minmax-fp.ll
@@ -0,0 +1,156 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @t1
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fpext float %1 to double
+define double @t1(float %a) {
+ ; This is the canonical form for a type-changing min/max.
+ %1 = fcmp ult float %a, 5.0
+ %2 = select i1 %1, float %a, float 5.0
+ %3 = fpext float %2 to double
+ ret double %3
+}
+
+; CHECK-LABEL: @t2
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fpext float %1 to double
+define double @t2(float %a) {
+ ; Check this is converted into canonical form, as above.
+ %1 = fcmp ult float %a, 5.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double 5.0
+ ret double %3
+}
+
+; CHECK-LABEL: @t4
+; CHECK-NEXT: fcmp oge double %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, double 5.000000e+00, double %a
+; CHECK-NEXT: fptrunc double %1 to float
+define float @t4(double %a) {
+ ; Same again, with trunc.
+ %1 = fcmp ult double %a, 5.0
+ %2 = fptrunc double %a to float
+ %3 = select i1 %1, float %2, float 5.0
+ ret float %3
+}
+
+; CHECK-LABEL: @t5
+; CHECK-NEXT: fcmp ult float %a, 5.000000e+00
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double 5.001
+define double @t5(float %a) {
+ ; different values, should not be converted.
+ %1 = fcmp ult float %a, 5.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double 5.001
+ ret double %3
+}
+
+; CHECK-LABEL: @t6
+; CHECK-NEXT: fcmp ult float %a, -0.0
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double 0.0
+define double @t6(float %a) {
+ ; Signed zero, should not be converted
+ %1 = fcmp ult float %a, -0.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double 0.0
+ ret double %3
+}
+
+; CHECK-LABEL: @t7
+; CHECK-NEXT: fcmp ult float %a, 0.0
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double -0.0
+define double @t7(float %a) {
+ ; Signed zero, should not be converted
+ %1 = fcmp ult float %a, 0.0
+ %2 = fpext float %a to double
+ %3 = select i1 %1, double %2, double -0.0
+ ret double %3
+}
+
+; CHECK-LABEL: @t8
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fptoui float %1 to i64
+define i64 @t8(float %a) {
+ %1 = fcmp ult float %a, 5.0
+ %2 = fptoui float %a to i64
+ %3 = select i1 %1, i64 %2, i64 5
+ ret i64 %3
+}
+
+; CHECK-LABEL: @t9
+; CHECK-NEXT: fcmp oge float %a, 0.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 0.000000e+00, float %a
+; CHECK-NEXT: fptosi float %1 to i8
+define i8 @t9(float %a) {
+ %1 = fcmp ult float %a, 0.0
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 0
+ ret i8 %3
+}
+
+; CHECK-LABEL: @t11
+; CHECK-NEXT: fcmp fast oge float %b, %a
+; CHECK-NEXT: select i1 %.inv, float %a, float %b
+; CHECK-NEXT: fptosi
+define i8 @t11(float %a, float %b) {
+ ; Either operand could be NaN, but fast modifier applied.
+ %1 = fcmp fast ult float %b, %a
+ %2 = fptosi float %a to i8
+ %3 = fptosi float %b to i8
+ %4 = select i1 %1, i8 %3, i8 %2
+ ret i8 %4
+}
+
+; CHECK-LABEL: @t12
+; CHECK-NEXT: fcmp nnan oge float %b, %a
+; CHECK-NEXT: select i1 %.inv, float %a, float %b
+; CHECK-NEXT: fptosi float %.v to i8
+define i8 @t12(float %a, float %b) {
+ ; Either operand could be NaN, but nnan modifier applied.
+ %1 = fcmp nnan ult float %b, %a
+ %2 = fptosi float %a to i8
+ %3 = fptosi float %b to i8
+ %4 = select i1 %1, i8 %3, i8 %2
+ ret i8 %4
+}
+
+; CHECK-LABEL: @t13
+; CHECK-NEXT: fcmp ult float %a, 1.500000e+00
+; CHECK-NEXT: fptosi float %a to i8
+; CHECK-NEXT: select i1 %1, i8 %2, i8 1
+define i8 @t13(float %a) {
+ ; Float and int values do not match.
+ %1 = fcmp ult float %a, 1.5
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 1
+ ret i8 %3
+}
+
+; CHECK-LABEL: @t14
+; CHECK-NEXT: fcmp ule float %a, 0.000000e+00
+; CHECK-NEXT: fptosi float %a to i8
+; CHECK-NEXT: select i1 %1, i8 %2, i8 0
+define i8 @t14(float %a) {
+ ; <= comparison, where %a could be -0.0. Not safe.
+ %1 = fcmp ule float %a, 0.0
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 0
+ ret i8 %3
+}
+
+; CHECK-LABEL: @t15
+; CHECK-NEXT: fcmp nsz oge float %a, 0.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 0.000000e+00, float %a
+; CHECK-NEXT: fptosi float %1 to i8
+define i8 @t15(float %a) {
+ %1 = fcmp nsz ule float %a, 0.0
+ %2 = fptosi float %a to i8
+ %3 = select i1 %1, i8 %2, i8 0
+ ret i8 %3
+}
diff --git a/test/Transforms/InstCombine/neon-intrinsics.ll b/test/Transforms/InstCombine/neon-intrinsics.ll
index 3ad09cc6c694..d22fa9c811dc 100644
--- a/test/Transforms/InstCombine/neon-intrinsics.ll
+++ b/test/Transforms/InstCombine/neon-intrinsics.ll
@@ -3,8 +3,8 @@
; The alignment arguments for NEON load/store intrinsics can be increased
; by instcombine. Check for this.
-; CHECK: vld4.v2i32({{.*}}, i32 32)
-; CHECK: vst4.v2i32({{.*}}, i32 16)
+; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32)
+; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16)
@x = common global [8 x i32] zeroinitializer, align 32
@y = common global [8 x i32] zeroinitializer, align 16
@@ -12,14 +12,14 @@
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
define void @test() nounwind ssp {
- %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
- call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
+ call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
ret void
}
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
index cec5297695b1..3df04d2c8902 100644
--- a/test/Transforms/InstCombine/no_cgscc_assert.ll
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -10,7 +10,7 @@ define float @bar(float %f) #0 {
ret float %call1
; CHECK-LABEL: @bar(
-; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: call fast float @llvm.fabs.f32
; CHECK-NEXT: ret float
}
diff --git a/test/Transforms/InstCombine/nonnull-attribute.ll b/test/Transforms/InstCombine/nonnull-attribute.ll
new file mode 100644
index 000000000000..74fb09114927
--- /dev/null
+++ b/test/Transforms/InstCombine/nonnull-attribute.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that we do not assume globals in address spaces other
+; than 0 are able to be null.
+
+@as0 = external global i32
+@as1 = external addrspace(1) global i32
+
+declare void @addrspace0(i32*)
+declare void @addrspace1(i32 addrspace(1)*)
+
+; CHECK: call void @addrspace0(i32* nonnull @as0)
+; CHECK: call void @addrspace1(i32 addrspace(1)* @as1)
+
+define void @test() {
+ call void @addrspace0(i32* @as0)
+ call void @addrspace1(i32 addrspace(1)* @as1)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index 9d59edd7934d..edb402a125ac 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -5,51 +5,51 @@
; CHECK-NOT: xor
define i32 @test1(i32 %A) {
- %B = xor i32 %A, -1 ; <i32> [#uses=1]
- %C = xor i32 %B, -1 ; <i32> [#uses=1]
+ %B = xor i32 %A, -1
+ %C = xor i32 %B, -1
ret i32 %C
}
define i1 @test2(i32 %A, i32 %B) {
; Can change into setge
- %cond = icmp sle i32 %A, %B ; <i1> [#uses=1]
- %Ret = xor i1 %cond, true ; <i1> [#uses=1]
+ %cond = icmp sle i32 %A, %B
+ %Ret = xor i1 %cond, true
ret i1 %Ret
}
-; Test that demorgans law can be instcombined
+; Test that De Morgan's law can be instcombined.
define i32 @test3(i32 %A, i32 %B) {
- %a = xor i32 %A, -1 ; <i32> [#uses=1]
- %b = xor i32 %B, -1 ; <i32> [#uses=1]
- %c = and i32 %a, %b ; <i32> [#uses=1]
- %d = xor i32 %c, -1 ; <i32> [#uses=1]
+ %a = xor i32 %A, -1
+ %b = xor i32 %B, -1
+ %c = and i32 %a, %b
+ %d = xor i32 %c, -1
ret i32 %d
}
-; Test that demorgens law can work with constants
+; Test that De Morgan's law can work with constants.
define i32 @test4(i32 %A, i32 %B) {
- %a = xor i32 %A, -1 ; <i32> [#uses=1]
- %c = and i32 %a, 5 ; <i32> [#uses=1]
- %d = xor i32 %c, -1 ; <i32> [#uses=1]
+ %a = xor i32 %A, -1
+ %c = and i32 %a, 5
+ %d = xor i32 %c, -1
ret i32 %d
}
-; test the mirror of demorgans law...
+; Test the mirror of De Morgan's law.
define i32 @test5(i32 %A, i32 %B) {
- %a = xor i32 %A, -1 ; <i32> [#uses=1]
- %b = xor i32 %B, -1 ; <i32> [#uses=1]
- %c = or i32 %a, %b ; <i32> [#uses=1]
- %d = xor i32 %c, -1 ; <i32> [#uses=1]
+ %a = xor i32 %A, -1
+ %b = xor i32 %B, -1
+ %c = or i32 %a, %b
+ %d = xor i32 %c, -1
ret i32 %d
}
; PR2298
-define zeroext i8 @test6(i32 %a, i32 %b) nounwind {
+define zeroext i8 @test6(i32 %a, i32 %b) {
entry:
- %tmp1not = xor i32 %a, -1 ; <i32> [#uses=1]
- %tmp2not = xor i32 %b, -1 ; <i32> [#uses=1]
- %tmp3 = icmp slt i32 %tmp1not, %tmp2not ; <i1> [#uses=1]
- %retval67 = zext i1 %tmp3 to i8 ; <i8> [#uses=1]
+ %tmp1not = xor i32 %a, -1
+ %tmp2not = xor i32 %b, -1
+ %tmp3 = icmp slt i32 %tmp1not, %tmp2not
+ %retval67 = zext i1 %tmp3 to i8
ret i8 %retval67
}
@@ -58,3 +58,4 @@ define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
%Ret = xor <2 x i1> %cond, <i1 true, i1 true>
ret <2 x i1> %Ret
}
+
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
index 6046dad89790..ab4b64dfbf07 100644
--- a/test/Transforms/InstCombine/objsize-address-space.ll
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -32,7 +32,7 @@ define i16 @foo_as3_i16() nounwind {
ret i16 %1
}
-@a_alias = weak alias [60 x i8] addrspace(3)* @a_as3
+@a_alias = weak alias [60 x i8], [60 x i8] addrspace(3)* @a_as3
define i32 @foo_alias() nounwind {
%1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
ret i32 %1
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 335a816e9ece..2af391f907cc 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -219,7 +219,7 @@ define i32 @test13(i8** %esc) {
ret i32 %1
}
-@globalalias = internal alias [60 x i8]* @a
+@globalalias = internal alias [60 x i8], [60 x i8]* @a
; CHECK-LABEL: @test18(
; CHECK-NEXT: ret i32 60
@@ -229,7 +229,7 @@ define i32 @test18() {
ret i32 %1
}
-@globalalias2 = weak alias [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8], [60 x i8]* @a
; CHECK-LABEL: @test19(
; CHECK: llvm.objectsize
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index b91a5954d97e..a2bc4e7d9832 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -182,7 +182,7 @@ define i1 @test19(i32 %A) {
%D = or i1 %B, %C
ret i1 %D
; CHECK-LABEL: @test19(
-; CHECK: and i32
+; CHECK: or i32
; CHECK: icmp eq
; CHECK: ret i1
}
diff --git a/test/Transforms/InstCombine/phi-load-metadata-2.ll b/test/Transforms/InstCombine/phi-load-metadata-2.ll
new file mode 100644
index 000000000000..cfbf2dea8a7a
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata-2.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+ br i1 %c, label %t, label %f
+t:
+ call void @bar()
+ %v1 = load i32*, i32** %p1, align 8, !dereferenceable !0
+ br label %cont
+
+f:
+ call void @baz()
+ %v2 = load i32*, i32** %p2, align 8, !dereferenceable !1
+ br label %cont
+
+cont:
+ %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+ ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi-load-metadata-3.ll b/test/Transforms/InstCombine/phi-load-metadata-3.ll
new file mode 100644
index 000000000000..39049c9c7181
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata-3.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable_or_null metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+ br i1 %c, label %t, label %f
+t:
+ call void @bar()
+ %v1 = load i32*, i32** %p1, align 8, !dereferenceable_or_null !0
+ br label %cont
+
+f:
+ call void @baz()
+ %v2 = load i32*, i32** %p2, align 8, !dereferenceable_or_null !1
+ br label %cont
+
+cont:
+ %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+ ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi-load-metadata.ll b/test/Transforms/InstCombine/phi-load-metadata.ll
new file mode 100644
index 000000000000..004a355ca441
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that align metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+ br i1 %c, label %t, label %f
+t:
+ call void @bar()
+ %v1 = load i32*, i32** %p1, align 8, !align !0
+ br label %cont
+
+f:
+ call void @baz()
+ %v2 = load i32*, i32** %p2, align 8, !align !1
+ br label %cont
+
+cont:
+ %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+ ret i32* %res
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 54cc4cfe4594..d0441d76d399 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -630,3 +630,133 @@ done:
%y = phi i32 [ undef, %entry ]
ret i32 %y
}
+
+; We should be able to fold the zexts to the other side of the phi
+; even though there's a constant value input to the phi. This is
+; because we can shrink that constant to the smaller phi type.
+
+define i1 @PR24766(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but no constants
+
+define i1 @PR24766_no_constants(i8 %x1, i8 %x2, i8 %condition, i1 %another_condition) {
+entry:
+ %frombool0 = zext i1 %another_condition to i8
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ %frombool0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_no_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ %another_condition, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants
+
+define i1 @PR24766_two_constants(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ 0, %entry ], [ 1, %sw2 ], [ %frombool1, %sw1 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ true, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants and two variables
+
+define i1 @PR24766_two_constants_two_var(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+ %conv = sext i8 %condition to i32
+ switch i32 %conv, label %epilog [
+ i32 0, label %sw1
+ i32 1, label %sw2
+ i32 2, label %sw3
+ ]
+
+sw1:
+ %cmp1 = icmp eq i8 %x1, %x2
+ %frombool1 = zext i1 %cmp1 to i8
+ br label %epilog
+
+sw2:
+ %cmp2 = icmp sle i8 %x1, %x2
+ %frombool2 = zext i1 %cmp2 to i8
+ br label %epilog
+
+sw3:
+ %cmp3 = icmp sge i8 %x1, %x2
+ %frombool3 = zext i1 %cmp3 to i8
+ br label %epilog
+
+epilog:
+ %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ], [ 1, %sw3 ]
+ %tobool = icmp ne i8 %conditionMet, 0
+ ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants_two_var(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ], [ true, %sw3 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index fb3b7d796160..f2b56fd33d64 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -6,6 +6,8 @@
; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-EXP10
; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-EXP10
; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefix=CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-tvos9.0 | FileCheck %s --check-prefix=CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-watchos2.0 | FileCheck %s --check-prefix=CHECK-EXP10
; rdar://7251832
; NOTE: The readonly attribute on the pow call should be preserved
diff --git a/test/Transforms/InstCombine/pow-4.ll b/test/Transforms/InstCombine/pow-4.ll
new file mode 100644
index 000000000000..76ef4c5de923
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-4.ll
@@ -0,0 +1,120 @@
+; Test that the pow library call simplifier works correctly.
+
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+
+; pow(x, 4.0f)
+define float @test_simplify_4f(float %x) #0 {
+; CHECK-LABEL: @test_simplify_4f(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul float %x, %x
+; CHECK-NEXT: %2 = fmul float %1, %1
+; CHECK-NEXT: ret float %2
+ %1 = call float @llvm.pow.f32(float %x, float 4.000000e+00)
+ ret float %1
+}
+
+; pow(x, 3.0)
+define double @test_simplify_3(double %x) #0 {
+; CHECK-LABEL: @test_simplify_3(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: ret double %2
+ %1 = call double @llvm.pow.f64(double %x, double 3.000000e+00)
+ ret double %1
+}
+
+; pow(x, 4.0)
+define double @test_simplify_4(double %x) #0 {
+; CHECK-LABEL: @test_simplify_4(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: ret double %2
+ %1 = call double @llvm.pow.f64(double %x, double 4.000000e+00)
+ ret double %1
+}
+
+; pow(x, 15.0)
+define double @test_simplify_15(double %x) #0 {
+; CHECK-LABEL: @test_simplify_15(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %2, %4
+; CHECK-NEXT: ret double %5
+ %1 = call double @llvm.pow.f64(double %x, double 1.500000e+01)
+ ret double %1
+}
+
+; pow(x, -7.0)
+define double @test_simplify_neg_7(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_7(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %1, %2
+; CHECK-NEXT: %4 = fmul double %1, %3
+; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4
+; CHECK-NEXT: ret double %5
+ %1 = call double @llvm.pow.f64(double %x, double -7.000000e+00)
+ ret double %1
+}
+
+; pow(x, -19.0)
+define double @test_simplify_neg_19(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_19(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %1, %4
+; CHECK-NEXT: %6 = fmul double %5, %x
+; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6
+; CHECK-NEXT: ret double %7
+ %1 = call double @llvm.pow.f64(double %x, double -1.900000e+01)
+ ret double %1
+}
+
+; pow(x, 11.23)
+define double @test_simplify_11_23(double %x) #0 {
+; CHECK-LABEL: @test_simplify_11_23(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+; CHECK-NEXT: ret double %1
+ %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+ ret double %1
+}
+
+; pow(x, 32.0)
+define double @test_simplify_32(double %x) #0 {
+; CHECK-LABEL: @test_simplify_32(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %4, %4
+; CHECK-NEXT: ret double %5
+ %1 = call double @llvm.pow.f64(double %x, double 3.200000e+01)
+ ret double %1
+}
+
+; pow(x, 33.0)
+define double @test_simplify_33(double %x) #0 {
+; CHECK-LABEL: @test_simplify_33(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+; CHECK-NEXT: ret double %1
+ %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+ ret double %1
+}
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/pow-exp-nofastmath.ll b/test/Transforms/InstCombine/pow-exp-nofastmath.ll
new file mode 100644
index 000000000000..9e596fa3a723
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+ %call = call double @exp(double %x)
+ %pow = call double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %call = call double @exp(double %x)
+; CHECK: %pow = call double @llvm.pow.f64(double %call, double %y)
+; CHECK: ret double %pow
+; CHECK: }
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/InstCombine/pow-exp.ll b/test/Transforms/InstCombine/pow-exp.ll
new file mode 100644
index 000000000000..acc512734ec5
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+ %call = call double @exp(double %x)
+ %pow = call double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %mul = fmul fast double %x, %y
+; CHECK: %exp = call fast double @exp(double %mul) #0
+; CHECK: ret double %exp
+; CHECK: }
+
+define double @test2(double ()* %fptr, double %p1) #0 {
+ %call1 = call double %fptr()
+ %pow = call double @llvm.pow.f64(double %call1, double %p1)
+ ret double %pow
+}
+
+; CHECK-LABEL: @test2
+; CHECK: llvm.pow.f64
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-exp2.ll b/test/Transforms/InstCombine/pow-exp2.ll
new file mode 100644
index 000000000000..c42cab391e64
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp2.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+ %call = call double @exp2(double %x)
+ %pow = call double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %mul = fmul fast double %x, %y
+; CHECK: %exp2 = call fast double @exp2(double %mul) #0
+; CHECK: ret double %exp2
+; CHECK: }
+
+declare double @exp2(double) #1
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
new file mode 100644
index 000000000000..8fc74e4a0024
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x) #0 {
+entry:
+ %pow = call double @llvm.pow.f64(double %x, double 5.000000e-01)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK: %sqrt = call double @sqrt(double %x) #1
+; CHECK: ret double %sqrt
+; CHECK: }
+
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll
deleted file mode 100644
index 0ef315936ff2..000000000000
--- a/test/Transforms/InstCombine/pr20059.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-
-; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
-; for an srem operation. This is not a valid optimization because it may cause a trap
-; on div-by-zero.
-
-; CHECK-LABEL: @do_not_reorder
-; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
-define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) {
- %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
- %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
- %retval = srem <4 x i32> %splat1, %splat2
- ret <4 x i32> %retval
-}
diff --git a/test/Transforms/InstCombine/pr24605.ll b/test/Transforms/InstCombine/pr24605.ll
new file mode 100644
index 000000000000..4b7b36137e6a
--- /dev/null
+++ b/test/Transforms/InstCombine/pr24605.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @f(i8* %a, i8 %b) {
+; CHECK-LABEL: @f(
+entry:
+ %or = or i8 %b, -117
+ %sub = add i8 %or, -1
+ store i8 %sub, i8* %a, align 1
+ %cmp = icmp ugt i8 %or, %sub
+ ret i1 %cmp
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/pr25745.ll b/test/Transforms/InstCombine/pr25745.ll
new file mode 100644
index 000000000000..3bf9efc92b90
--- /dev/null
+++ b/test/Transforms/InstCombine/pr25745.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Checking for a crash
+
+declare void @use.i1(i1 %val)
+declare void @use.i64(i64 %val)
+
+define i64 @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+ %x.wide = sext i32 %x to i64
+ %minus.x = sub i32 0, %x
+ %minus.x.wide = sext i32 %minus.x to i64
+ %c = icmp slt i32 %x, 0
+ %val = select i1 %c, i64 %x.wide, i64 %minus.x.wide
+ call void @use.i1(i1 %c)
+ call void @use.i64(i64 %x.wide)
+ ret i64 %val
+; CHECK: ret i64 %val
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 79c2ae28105e..0b5b5deb68c5 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -575,7 +575,7 @@ entry:
; CHECK: %0 = shl i8 %tmp4, 2
; CHECK: %tmp54 = and i8 %0, 16
%tmp55 = xor i8 %tmp54, %tmp51
-; CHECK: ret i8 %tmp55.1
+; CHECK: ret i8 %tmp551
ret i8 %tmp55
}
@@ -743,7 +743,7 @@ define i32 @test57(i32 %x) {
%or = or i32 %shl, 7
ret i32 %or
; CHECK-LABEL: @test57(
-; CHECK: %shl = shl i32 %shr.1, 4
+; CHECK: %shl = shl i32 %shr1, 4
}
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
index f49fb35cb76a..10342c500961 100644
--- a/test/Transforms/InstCombine/sincospi.ll
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -90,3 +90,12 @@ define double @test_constant_f64() {
; CHECK-NO-SINCOS: call double @__sinpi
; CHECK-NO-SINCOS: call double @__cospi
}
+
+define double @test_fptr(double (double)* %fptr, double %p1) {
+ %sin = call double @__sinpi(double %p1) #0
+ %cos = call double %fptr(double %p1)
+ %res = fadd double %sin, %cos
+ ret double %res
+; CHECK-LABEL: @test_fptr
+; CHECK: __sinpi
+}
diff --git a/test/Transforms/InstCombine/sqrt-nofast.ll b/test/Transforms/InstCombine/sqrt-nofast.ll
new file mode 100644
index 000000000000..0d1dfc1542a5
--- /dev/null
+++ b/test/Transforms/InstCombine/sqrt-nofast.ll
@@ -0,0 +1,25 @@
+; Check that we skip transformations if the attribute unsafe-fp-math
+; is not set.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mysqrt(float %x, float %y) #0 {
+entry:
+ %x.addr = alloca float, align 4
+ %y.addr = alloca float, align 4
+ store float %x, float* %x.addr, align 4
+ store float %y, float* %y.addr, align 4
+ %0 = load float, float* %x.addr, align 4
+ %1 = load float, float* %x.addr, align 4
+ %mul = fmul fast float %0, %1
+ %2 = call float @llvm.sqrt.f32(float %mul)
+ ret float %2
+}
+
+declare float @llvm.sqrt.f32(float) #1
+
+; CHECK: define float @mysqrt(float %x, float %y) {
+; CHECK: entry:
+; CHECK: %mul = fmul fast float %x, %x
+; CHECK: %0 = call float @llvm.sqrt.f32(float %mul)
+; CHECK: ret float %0
+; CHECK: }
diff --git a/test/Transforms/InstCombine/statepoint.ll b/test/Transforms/InstCombine/statepoint.ll
index f904f207bfdc..54fb6a7756ff 100644
--- a/test/Transforms/InstCombine/statepoint.ll
+++ b/test/Transforms/InstCombine/statepoint.ll
@@ -7,8 +7,8 @@ declare void @func()
define i1 @test_negative(i32 addrspace(1)* %p) gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_negative
@@ -18,8 +18,8 @@ entry:
define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_nonnull
@@ -28,8 +28,8 @@ entry:
define i1 @test_null() gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_null
@@ -39,8 +39,8 @@ entry:
define i1 @test_undef() gc "statepoint-example" {
entry:
- %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
- %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+ %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
%cmp = icmp eq i32 addrspace(1)* %pnew, null
ret i1 %cmp
; CHECK-LABEL: test_undef
@@ -48,5 +48,5 @@ entry:
; CHECK: ret i1 undef
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 5dfbd7140901..b8730413f1b5 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,6 +113,119 @@ for.end: ; preds = %for.cond
; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
}
+define void @dse1(i32* %p) {
+; CHECK-LABEL: dse1
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 0, i32* %p
+ store i32 0, i32* %p
+ ret void
+}
+
+; Slightly subtle: if we're mixing atomic and non-atomic access to the
+; same location, then the contents of the location are undefined if there's
+; an actual race. As such, we're free to pick either store under the
+; assumption that we're not racing with any other thread.
+define void @dse2(i32* %p) {
+; CHECK-LABEL: dse2
+; CHECK-NEXT: store i32 0, i32* %p
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %p unordered, align 4
+ store i32 0, i32* %p
+ ret void
+}
+
+define void @dse3(i32* %p) {
+; CHECK-LABEL: dse3
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+ store i32 0, i32* %p
+ store atomic i32 0, i32* %p unordered, align 4
+ ret void
+}
+
+define void @dse4(i32* %p) {
+; CHECK-LABEL: dse4
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %p unordered, align 4
+ store atomic i32 0, i32* %p unordered, align 4
+ ret void
+}
+
+; Implementation limit - could remove unordered store here, but
+; currently don't.
+define void @dse5(i32* %p) {
+; CHECK-LABEL: dse5
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store atomic i32 0, i32* %p unordered, align 4
+ store atomic i32 0, i32* %p seq_cst, align 4
+ ret void
+}
+
+define void @write_back1(i32* %p) {
+; CHECK-LABEL: write_back1
+; CHECK-NEXT: ret
+ %v = load i32, i32* %p
+ store i32 %v, i32* %p
+ ret void
+}
+
+define void @write_back2(i32* %p) {
+; CHECK-LABEL: write_back2
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p unordered, align 4
+ store i32 %v, i32* %p
+ ret void
+}
+
+define void @write_back3(i32* %p) {
+; CHECK-LABEL: write_back3
+; CHECK-NEXT: ret
+ %v = load i32, i32* %p
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
+define void @write_back4(i32* %p) {
+; CHECK-LABEL: write_back4
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p unordered, align 4
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
+; Can't remove store due to ordering side effect
+define void @write_back5(i32* %p) {
+; CHECK-LABEL: write_back5
+; CHECK-NEXT: load
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p unordered, align 4
+ store atomic i32 %v, i32* %p seq_cst, align 4
+ ret void
+}
+
+define void @write_back6(i32* %p) {
+; CHECK-LABEL: write_back6
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+ %v = load atomic i32, i32* %p seq_cst, align 4
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
+define void @write_back7(i32* %p) {
+; CHECK-LABEL: write_back7
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+ %v = load atomic volatile i32, i32* %p seq_cst, align 4
+ store atomic i32 %v, i32* %p unordered, align 4
+ ret void
+}
+
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index fc35dddcae5a..96f36e8d89c7 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -1,6 +1,6 @@
; Test that the strto* library call simplifiers works correctly.
;
-; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s
+; RUN: opt < %s -instcombine -inferattrs -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/Transforms/InstCombine/tan-nofastmath.ll b/test/Transforms/InstCombine/tan-nofastmath.ll
new file mode 100644
index 000000000000..0fe7b2c1d522
--- /dev/null
+++ b/test/Transforms/InstCombine/tan-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) {
+entry:
+ %call = call float @atanf(float %x)
+ %call1 = call float @tanf(float %call)
+ ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK: %call = call float @atanf(float %x)
+; CHECK-NEXT: %call1 = call float @tanf(float %call)
+; CHECK-NEXT: ret float %call1
+; CHECK-NEXT: }
+
+declare float @tanf(float)
+declare float @atanf(float)
diff --git a/test/Transforms/InstCombine/tan.ll b/test/Transforms/InstCombine/tan.ll
new file mode 100644
index 000000000000..15a832f253a9
--- /dev/null
+++ b/test/Transforms/InstCombine/tan.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) #0 {
+entry:
+ %call = call float @atanf(float %x)
+ %call1 = call float @tanf(float %call)
+ ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK: ret float %x
+
+define float @test2(float ()* %fptr) #0 {
+ %call1 = call float %fptr()
+ %tan = call float @tanf(float %call1)
+ ret float %tan
+}
+
+; CHECK-LABEL: @test2
+; CHECK: tanf
+
+declare float @tanf(float) #0
+declare float @atanf(float) #0
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/token.ll b/test/Transforms/InstCombine/token.ll
new file mode 100644
index 000000000000..0929cf7ebee1
--- /dev/null
+++ b/test/Transforms/InstCombine/token.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+declare i32 @__CxxFrameHandler3(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+ unreachable
+
+unreachable:
+ %cl = cleanuppad within none []
+ cleanupret from %cl unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: unreachable:
+; CHECK: %cl = cleanuppad within none []
+; CHECK: cleanupret from %cl unwind to caller
+
+define void @test2(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+ %X = zext i8 %A to i32
+ invoke void @g(i32 0)
+ to label %cont
+ unwind label %catch
+
+cont:
+ %Y = zext i8 %B to i32
+ invoke void @g(i32 0)
+ to label %unreachable
+ unwind label %catch
+
+catch:
+ %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+ %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+ %cl = catchpad within %cs []
+ call void @g(i32 %phi)
+ unreachable
+
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: define void @test2(
+; CHECK: %X = zext i8 %A to i32
+; CHECK: %Y = zext i8 %B to i32
+; CHECK: %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+
+define void @test3(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+ %X = zext i8 %A to i32
+ invoke void @g(i32 0)
+ to label %cont
+ unwind label %catch
+
+cont:
+ %Y = zext i8 %B to i32
+ invoke void @g(i32 0)
+ to label %cont2
+ unwind label %catch
+
+cont2:
+ invoke void @g(i32 0)
+ to label %unreachable
+ unwind label %catch
+
+catch:
+ %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+ %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+ %cl = catchpad within %cs []
+ call void @g(i32 %phi)
+ unreachable
+
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: define void @test3(
+; CHECK: %X = zext i8 %A to i32
+; CHECK: %Y = zext i8 %B to i32
+; CHECK: %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+
+
+declare void @g(i32)
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index ee81cf8c3c5d..38f6b2804d63 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -118,3 +118,45 @@ define i8 @test10(i32 %X) {
; CHECK: and
; CHECK: ret
}
+
+; PR25543
+; https://llvm.org/bugs/show_bug.cgi?id=25543
+; This is an extractelement.
+
+define i32 @trunc_bitcast1(<4 x i32> %v) {
+ %bc = bitcast <4 x i32> %v to i128
+ %shr = lshr i128 %bc, 32
+ %ext = trunc i128 %shr to i32
+ ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast1(
+; CHECK-NEXT: %ext = extractelement <4 x i32> %v, i32 1
+; CHECK-NEXT: ret i32 %ext
+}
+
+; A bitcast may still be required.
+
+define i32 @trunc_bitcast2(<2 x i64> %v) {
+ %bc = bitcast <2 x i64> %v to i128
+ %shr = lshr i128 %bc, 64
+ %ext = trunc i128 %shr to i32
+ ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast2(
+; CHECK-NEXT: %bc1 = bitcast <2 x i64> %v to <4 x i32>
+; CHECK-NEXT: %ext = extractelement <4 x i32> %bc1, i32 2
+; CHECK-NEXT: ret i32 %ext
+}
+
+; The right shift is optional.
+
+define i32 @trunc_bitcast3(<4 x i32> %v) {
+ %bc = bitcast <4 x i32> %v to i128
+ %ext = trunc i128 %bc to i32
+ ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast3(
+; CHECK-NEXT: %ext = extractelement <4 x i32> %v, i32 0
+; CHECK-NEXT: ret i32 %ext
+}
+
diff --git a/test/Transforms/InstCombine/unpack-fca.ll b/test/Transforms/InstCombine/unpack-fca.ll
index 48bb157956aa..9b8d10457491 100644
--- a/test/Transforms/InstCombine/unpack-fca.ll
+++ b/test/Transforms/InstCombine/unpack-fca.ll
@@ -5,110 +5,134 @@ target triple = "x86_64-unknown-linux-gnu"
%A__vtbl = type { i8*, i32 (%A*)* }
%A = type { %A__vtbl* }
+%B = type { i8*, i64 }
@A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
declare i32 @A.foo(%A* nocapture %this)
-declare i8* @allocmemory(i64)
-
-define void @storeA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to %A*
+define void @storeA(%A* %a.ptr) {
; CHECK-LABEL: storeA
-; CHECK: store %A__vtbl* @A__vtblZ
- store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store %A { %A__vtbl* @A__vtblZ }, %A* %a.ptr, align 8
+ ret void
+}
+
+define void @storeB(%B* %b.ptr) {
+; CHECK-LABEL: storeB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret void
+ store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
ret void
}
-define void @storeStructOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { %A }*
+define void @storeStructOfA({ %A }* %sa.ptr) {
; CHECK-LABEL: storeStructOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
ret void
}
-define void @storeArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to [1 x %A]*
+define void @storeArrayOfA([1 x %A]* %aa.ptr) {
; CHECK-LABEL: storeArrayOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %aa.ptr, align 8
ret void
}
-define void @storeStructOfArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { [1 x %A] }*
+define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
; CHECK-LABEL: storeStructOfArrayOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+ store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %saa.ptr, align 8
ret void
}
-define %A @loadA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to %A*
+define %A @loadA(%A* %a.ptr) {
; CHECK-LABEL: loadA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
- %2 = load %A, %A* %1, align 8
- ret %A %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: ret %A [[IV]]
+ %1 = load %A, %A* %a.ptr, align 8
+ ret %A %1
}
-define { %A } @loadStructOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { %A }*
+define %B @loadB(%B* %b.ptr) {
+; CHECK-LABEL: loadB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1
+; CHECK-NEXT: ret %B [[IV2]]
+ %1 = load %B, %B* %b.ptr, align 8
+ ret %B %1
+}
+
+define { %A } @loadStructOfA({ %A }* %sa.ptr) {
; CHECK-LABEL: loadStructOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue { %A } undef, %A {{.*}}, 0
- %2 = load { %A }, { %A }* %1, align 8
- ret { %A } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue { %A } undef, %A [[IV1]], 0
+; CHECK-NEXT: ret { %A } [[IV2]]
+ %1 = load { %A }, { %A }* %sa.ptr, align 8
+ ret { %A } %1
}
-define [1 x %A] @loadArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to [1 x %A]*
+define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) {
; CHECK-LABEL: loadArrayOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0
- %2 = load [1 x %A], [1 x %A]* %1, align 8
- ret [1 x %A] %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: ret [1 x %A] [[IV2]]
+ %1 = load [1 x %A], [1 x %A]* %aa.ptr, align 8
+ ret [1 x %A] %1
}
-define { [1 x %A] } @loadStructOfArrayOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { [1 x %A] }*
+define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
; CHECK-LABEL: loadStructOfArrayOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0
-; CHECK: insertvalue { [1 x %A] } undef, [1 x %A] {{.*}}, 0
- %2 = load { [1 x %A] }, { [1 x %A] }* %1, align 8
- ret { [1 x %A] } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue { [1 x %A] } undef, [1 x %A] [[IV2]], 0
+; CHECK-NEXT: ret { [1 x %A] } [[IV3]]
+ %1 = load { [1 x %A] }, { [1 x %A] }* %saa.ptr, align 8
+ ret { [1 x %A] } %1
}
-define { %A } @structOfA() {
-body:
- %0 = tail call i8* @allocmemory(i64 32)
- %1 = bitcast i8* %0 to { %A }*
+define { %A } @structOfA({ %A }* %sa.ptr) {
; CHECK-LABEL: structOfA
-; CHECK: store %A__vtbl* @A__vtblZ
- store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8
- %2 = load { %A }, { %A }* %1, align 8
-; CHECK-NOT: load
-; CHECK: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
- ret { %A } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
+ store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
+ %1 = load { %A }, { %A }* %sa.ptr, align 8
+ ret { %A } %1
+}
+
+define %B @structB(%B* %b.ptr) {
+; CHECK-LABEL: structB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret %B { i8* null, i64 42 }
+ store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
+ %1 = load %B, %B* %b.ptr, align 8
+ ret %B %1
}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 4245c7a3c134..0b9663300c39 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -138,22 +138,6 @@ declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
-; <rdar://problem/6945110>
-define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
-entry:
- %tmp = load <4 x i16>, <4 x i16>* %src
- %tmp1 = load <8 x i16>, <8 x i16>* %foo
-; CHECK: %tmp2 = shufflevector
- %tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; pmovzxwd ignores the upper 64-bits of its input; -instcombine should remove this shuffle:
-; CHECK-NOT: shufflevector
- %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: pmovzxwd
- %0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
- ret <4 x i32> %0
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-
define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
entry:
; CHECK-LABEL: define <4 x float> @dead_shuffle_elt(
@@ -210,130 +194,6 @@ define <4 x float> @test_select(float %f, float %g) {
ret <4 x float> %ret
}
-; We should optimize these two redundant insertqi into one
-; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
-; CHECK-NOT: insertqi
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
- ret <2 x i64> %2
-}
-
-; The result of this insert is the second arg, since the top 64 bits of
-; the result are undefined, and we copy the bottom 64 bits from the
-; second arg
-; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> %i
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
- ret <2 x i64> %1
-}
-
-; Test the several types of ranges and ordering that exist for two insertqi
-; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
- %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
- ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> %i
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
- ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
- ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
- ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
- ret <2 x i64> %1
-}
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
-declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
-
declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
; CHECK-LABEL: @test_vpermilvar_ps(
@@ -394,212 +254,15 @@ define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
ret <4 x double> %a
}
-define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_1
-; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
-}
-
-define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_1
-; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
-}
-
-define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_0
-; CHECK: ret <2 x i64> zeroinitializer
-}
-
-define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_0
-; CHECK: ret <4 x i64> zeroinitializer
-}
-define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_psrl_1
-; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
-}
-
-define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
- %S = bitcast i32 1 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_psrl_1
-; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
-}
-
-define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
- %6 = bitcast <8 x i16> %5 to <4 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
- %9 = bitcast <4 x i32> %8 to <2 x i64>
- %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
- %11 = bitcast <2 x i64> %10 to <8 x i16>
- %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
- %13 = bitcast <8 x i16> %12 to <4 x i32>
- %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
- %15 = bitcast <4 x i32> %14 to <2 x i64>
- %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
- ret <2 x i64> %16
-; CHECK: test_sse2_psrl_0
-; CHECK: ret <2 x i64> zeroinitializer
-}
-
-define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
- %S = bitcast i32 128 to i32
- %1 = zext i32 %S to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- %3 = insertelement <2 x i64> %2, i64 0, i32 1
- %4 = bitcast <2 x i64> %3 to <8 x i16>
- %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
- %6 = bitcast <16 x i16> %5 to <8 x i32>
- %7 = bitcast <2 x i64> %3 to <4 x i32>
- %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
- %9 = bitcast <8 x i32> %8 to <4 x i64>
- %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
- %11 = bitcast <4 x i64> %10 to <16 x i16>
- %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
- %13 = bitcast <16 x i16> %12 to <8 x i32>
- %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
- %15 = bitcast <8 x i32> %14 to <4 x i64>
- %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
- ret <4 x i64> %16
-; CHECK: test_avx2_psrl_0
-; CHECK: ret <4 x i64> zeroinitializer
+define <2 x i64> @PR24922(<2 x i64> %v) {
+; CHECK-LABEL: @PR24922
+; CHECK: select <2 x i1>
+;
+; Check that instcombine doesn't wrongly fold the select statement into a
+; ret <2 x i64> %v
+;
+; FIXME: We should be able to simplify the ConstantExpr in the select mask.
+entry:
+ %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
+ ret <2 x i64> %result
}
-
-declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
-declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
-
-attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index d4d7f167ef07..d2cd2b90abc2 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -310,16 +310,16 @@ define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtabl
ret <4 x i32> %r
}
-define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
-; CHECK-LABEL: @shuffle_17fsub(
-; CHECK-NOT: shufflevector
-; CHECK: fsub <4 x float> %v1, %v2
-; CHECK: shufflevector
+define <4 x float> @shuffle_17fsub_fast(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17fsub_fast(
+; CHECK-NEXT: [[VAR1:%[a-zA-Z0-9.]+]] = fsub fast <4 x float> %v1, %v2
+; CHECK-NEXT: shufflevector <4 x float> [[VAR1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: ret <4 x float>
%t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
<4 x i32> <i32 1, i32 2, i32 3, i32 0>
%t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
<4 x i32> <i32 1, i32 2, i32 3, i32 0>
- %r = fsub <4 x float> %t1, %t2
+ %r = fsub fast <4 x float> %t1, %t2
ret <4 x float> %r
}
@@ -406,6 +406,21 @@ define i32 @pr19737(<4 x i32> %in0) {
ret i32 %rv
}
+; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
+; for an srem operation. This is not a valid optimization because it may cause a trap
+; on div-by-zero.
+
+define <4 x i32> @pr20059(<4 x i32> %p1, <4 x i32> %p2) {
+; CHECK-LABEL: @pr20059(
+; CHECK-NEXT: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
+ %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+ %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+ %retval = srem <4 x i32> %splat1, %splat2
+ ret <4 x i32> %retval
+}
+
define <4 x i32> @pr20114(<4 x i32> %__mask) {
; CHECK-LABEL: @pr20114
; CHECK: shufflevector
diff --git a/test/Transforms/InstCombine/vector_gep2.ll b/test/Transforms/InstCombine/vector_gep2.ll
index d76a7d56cc7a..1b80ffd101c9 100644
--- a/test/Transforms/InstCombine/vector_gep2.ll
+++ b/test/Transforms/InstCombine/vector_gep2.ll
@@ -9,3 +9,26 @@ define <2 x i8*> @testa(<2 x i8*> %a) {
; CHECK: getelementptr i8, <2 x i8*> %a, <2 x i64> <i64 0, i64 1>
ret <2 x i8*> %g
}
+
+define <8 x double*> @vgep_s_v8i64(double* %a, <8 x i64>%i) {
+; CHECK-LABEL: @vgep_s_v8i64
+; CHECK: getelementptr double, double* %a, <8 x i64> %i
+ %VectorGep = getelementptr double, double* %a, <8 x i64> %i
+ ret <8 x double*> %VectorGep
+}
+
+define <8 x double*> @vgep_s_v8i32(double* %a, <8 x i32>%i) {
+; CHECK-LABEL: @vgep_s_v8i32
+; CHECK: %1 = sext <8 x i32> %i to <8 x i64>
+; CHECK: getelementptr double, double* %a, <8 x i64> %1
+ %VectorGep = getelementptr double, double* %a, <8 x i32> %i
+ ret <8 x double*> %VectorGep
+}
+
+define <8 x i8*> @vgep_v8iPtr_i32(<8 x i8*> %a, i32 %i) {
+; CHECK-LABEL: @vgep_v8iPtr_i32
+; CHECK: %1 = sext i32 %i to i64
+; CHECK: %VectorGep = getelementptr i8, <8 x i8*> %a, i64 %1
+ %VectorGep = getelementptr i8, <8 x i8*> %a, i32 %i
+ ret <8 x i8*> %VectorGep
+}
diff --git a/test/Transforms/InstCombine/x86-f16c.ll b/test/Transforms/InstCombine/x86-f16c.ll
new file mode 100644
index 000000000000..e10b339907e3
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-f16c.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
+
+;
+; Vector Demanded Bits
+;
+
+; Only bottom 4 elements required.
+define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_128
+; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT: ret <4 x float> %1
+ %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
+ ret <4 x float> %2
+}
+
+; All 8 elements required.
+define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_256
+; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+; CHECK-NEXT: ret <8 x float> %2
+ %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+ ret <8 x float> %2
+}
+
+;
+; Constant Folding
+;
+
+define <4 x float> @fold_vcvtph2ps_128() {
+; CHECK-LABEL: @fold_vcvtph2ps_128
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+ %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+ ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256() {
+; CHECK-LABEL: @fold_vcvtph2ps_256
+; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+ %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+ ret <8 x float> %1
+}
+
+define <4 x float> @fold_vcvtph2ps_128_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero
+; CHECK-NEXT: ret <4 x float> zeroinitializer
+ %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero
+; CHECK-NEXT: ret <8 x float> zeroinitializer
+ %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ ret <8 x float> %1
+}
diff --git a/test/Transforms/InstCombine/x86-pmovsx.ll b/test/Transforms/InstCombine/x86-pmovsx.ll
new file mode 100644
index 000000000000..31bdc59b16a8
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pmovsx.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+;
+; Basic sign extension tests
+;
+
+define <4 x i32> @sse41_pmovsxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i8> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovsxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: sext <2 x i8> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i16> @sse41_pmovsxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbw
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: sext <8 x i8> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+
+ %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %v)
+ ret <8 x i16> %res
+}
+
+define <2 x i64> @sse41_pmovsxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxdq
+; CHECK-NEXT: shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: sext <2 x i32> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %v)
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @sse41_pmovsxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxwd
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i16> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovsxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: sext <2 x i16> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovsxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: sext <8 x i8> %1 to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %2
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovsxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i8> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %v)
+ ret <4 x i64> %res
+}
+
+define <16 x i16> @avx2_pmovsxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbw
+; CHECK-NEXT: sext <16 x i8> %v to <16 x i16>
+; CHECK-NEXT: ret <16 x i16> %1
+
+ %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %v)
+ ret <16 x i16> %res
+}
+
+define <4 x i64> @avx2_pmovsxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxdq
+; CHECK-NEXT: sext <4 x i32> %v to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %1
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %v)
+ ret <4 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovsxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxwd
+; CHECK-NEXT: sext <8 x i16> %v to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %1
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovsxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: sext <4 x i16> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %v)
+ ret <4 x i64> %res
+}
diff --git a/test/Transforms/InstCombine/x86-pmovzx.ll b/test/Transforms/InstCombine/x86-pmovzx.ll
new file mode 100644
index 000000000000..31028cba26eb
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pmovzx.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+;
+; Basic zero extension tests
+;
+
+define <4 x i32> @sse41_pmovzxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i8> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovzxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: zext <2 x i8> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i16> @sse41_pmovzxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbw
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: zext <8 x i8> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+
+ %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %v)
+ ret <8 x i16> %res
+}
+
+define <2 x i64> @sse41_pmovzxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxdq
+; CHECK-NEXT: shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: zext <2 x i32> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %v)
+ ret <2 x i64> %res
+}
+
+define <4 x i32> @sse41_pmovzxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxwd
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i16> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %v)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovzxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: zext <2 x i16> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %v)
+ ret <2 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovzxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbd
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: zext <8 x i8> %1 to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %2
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovzxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbq
+; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i8> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %v)
+ ret <4 x i64> %res
+}
+
+define <16 x i16> @avx2_pmovzxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbw
+; CHECK-NEXT: zext <16 x i8> %v to <16 x i16>
+; CHECK-NEXT: ret <16 x i16> %1
+
+ %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %v)
+ ret <16 x i16> %res
+}
+
+define <4 x i64> @avx2_pmovzxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxdq
+; CHECK-NEXT: zext <4 x i32> %v to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %1
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %v)
+ ret <4 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovzxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxwd
+; CHECK-NEXT: zext <8 x i16> %v to <8 x i32>
+; CHECK-NEXT: ret <8 x i32> %1
+
+ %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %v)
+ ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovzxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxwq
+; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: zext <4 x i16> %1 to <4 x i64>
+; CHECK-NEXT: ret <4 x i64> %2
+
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %v)
+ ret <4 x i64> %res
+}
diff --git a/test/Transforms/InstCombine/x86-pshufb.ll b/test/Transforms/InstCombine/x86-pshufb.ll
new file mode 100644
index 000000000000..caaaed8910a8
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pshufb.ll
@@ -0,0 +1,267 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <16 x i8> @identity_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test
+; CHECK: ret <16 x i8> %InVec
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2
+; CHECK: ret <32 x i8> %InVec
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+
+; Verify that instcombine is able to fold byte shuffles with zero masks.
+
+define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector
+; CHECK: ret <16 x i8> zeroinitializer
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2
+; CHECK: ret <32 x i8> zeroinitializer
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <16 x i8> @splat_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @splat_test
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
+ ret <16 x i8> %1
+}
+
+; In the test case below, elements in the low 128-bit lane of the result
+; vector are equal to the lower byte of %InVec (shuffle index 0).
+; Elements in the high 128-bit lane of the result vector are equal to
+; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
+
+define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @splat_test_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
+ ret <32 x i8> %1
+}
+
+; Each of the byte shuffles in the following tests is equivalent to a blend between
+; vector %InVec and a vector of all zeroes.
+
+define <16 x i8> @blend1(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend1
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend2(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend2
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend3(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend3
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend4(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend4
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend5(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend5
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @blend6(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend6
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend1_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend2_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend3_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend4_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend5_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend6_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+; movq idiom.
+define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+ ret <32 x i8> %1
+}
+
+; Vector permutations using byte shuffles.
+
+define <16 x i8> @permute1(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute1
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @permute2(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute2
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute1_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute2_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+ ret <32 x i8> %1
+}
+
+; Test that instcombine correctly folds a pshufb with values that
+; are not -128 and that are not encoded in four bits.
+
+define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test2_2
+; CHECK: ret <16 x i8> %InVec
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2_2
+; CHECK: ret <32 x i8> %InVec
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
+ ret <32 x i8> %1
+}
+
+define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_2
+; CHECK: ret <16 x i8> zeroinitializer
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2_2
+; CHECK: ret <32 x i8> zeroinitializer
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
+ ret <32 x i8> %1
+}
+
+define <16 x i8> @permute3(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute3
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute3_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
+ ret <32 x i8> %1
+}
+
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
diff --git a/test/Transforms/InstCombine/x86-sse4a.ll b/test/Transforms/InstCombine/x86-sse4a.ll
new file mode 100644
index 000000000000..815d26bd2254
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-sse4a.ll
@@ -0,0 +1,336 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; EXTRQ
+;
+
+define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg0
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg1
+; CHECK-NEXT: ret <2 x i64> %x
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_to_extqi
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant
+; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+ ret <2 x i64> %1
+}
+
+;
+; EXTRQI
+;
+
+define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu
+; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %3
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
+; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %3
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_undef
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_zero
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant
+; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
+ ret <2 x i64> %1
+}
+
+;
+; INSERTQ
+;
+
+define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_to_insertqi
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant
+; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
+ ret <2 x i64> %1
+}
+
+;
+; INSERTQI
+;
+
+define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_04uu
+; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> %1
+ %1 = bitcast <16 x i8> %v to <2 x i64>
+ %2 = bitcast <16 x i8> %i to <2 x i64>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
+ %4 = bitcast <2 x i64> %3 to <16 x i8>
+ ret <16 x i8> %4
+}
+
+define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
+; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> %1
+ %1 = bitcast <16 x i8> %v to <2 x i64>
+ %2 = bitcast <16 x i8> %i to <2 x i64>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
+ %4 = bitcast <2 x i64> %3 to <16 x i8>
+ ret <16 x i8> %4
+}
+
+define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @test_insertqi_constant
+; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
+ ret <2 x i64> %1
+}
+
+; The result of this insert is the second arg, since the top 64 bits of
+; the result are undefined, and we copy the bottom 64 bits from the
+; second arg
+define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testInsert64Bits
+; CHECK-NEXT: ret <2 x i64> %i
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testZeroLength
+; CHECK-NEXT: ret <2 x i64> %i
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_1
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_2
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_3
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+ ret <2 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg1
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_args01
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg1
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_args01
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_ret
+; CHECK-NEXT: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %2
+}
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/Transforms/InstCombine/x86-vector-shifts.ll b/test/Transforms/InstCombine/x86-vector-shifts.ll
new file mode 100644
index 000000000000..59e445a40bef
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-vector-shifts.ll
@@ -0,0 +1,1318 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; ASHR - Immediate
+;
+
+define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_15
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_64
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_15
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_64
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
+ ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_15
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_64
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_15
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_64
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
+ ret <8 x i32> %1
+}
+
+;
+; LSHR - Immediate
+;
+
+define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_15
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_15
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_15
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_15
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_15
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_15
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
+ ret <4 x i64> %1
+}
+
+;
+; SHL - Immediate
+;
+
+define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_15
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_15
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_15
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_15
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_15
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_15
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
+ ret <4 x i64> %1
+}
+
+;
+; ASHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15_splat
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_64
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15_splat
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_64
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15_splat
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_64
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15_splat
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_64
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+;
+; LSHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_15
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_15
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <4 x i64> %1
+}
+
+;
+; SHL - Constant Vector
+;
+
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_15
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+ %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+ ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_15
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+ ret <4 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psra_w_var_bc
+; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16>
+; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = bitcast <2 x i64> %1 to <8 x i16>
+ %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
+ ret <8 x i16> %3
+}
+
+define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psra_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_d_var_bc
+; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32>
+; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = bitcast <8 x i16> %1 to <4 x i32>
+ %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
+ ret <4 x i32> %3
+}
+
+define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psra_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
+ ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psra_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psrl_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psrl_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psrl_q_var
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
+ ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+ ret <16 x i16> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var_bc
+; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+; CHECK-NEXT: ret <16 x i16> %2
+ %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %2 = bitcast <16 x i8> %1 to <8 x i16>
+ %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
+ ret <16 x i16> %3
+}
+
+define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var_bc
+; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+; CHECK-NEXT: ret <8 x i32> %2
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = bitcast <2 x i64> %1 to <4 x i32>
+ %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_q_var
+; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
+ ret <4 x i64> %2
+}
+
+define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psll_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psll_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psll_q_var
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
+ ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psll_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+ %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
+ ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psll_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+ %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psll_q_var
+; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> %1
+ %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
+ ret <4 x i64> %2
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
+; CHECK-LABEL: @test_sse2_psra_w_0
+; CHECK-NEXT: ret <8 x i16> %A
+ %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
+ ret <8 x i16> %3
+}
+
+define <8 x i16> @test_sse2_psra_w_8() {
+; CHECK-LABEL: @test_sse2_psra_w_8
+; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+ %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
+ %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
+ %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
+ ret <8 x i16> %4
+}
+
+define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
+; CHECK-LABEL: @test_sse2_psra_d_0
+; CHECK-NEXT: ret <4 x i32> %A
+ %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+ %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @sse2_psra_d_8() {
+; CHECK-LABEL: @sse2_psra_d_8
+; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+ %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
+ %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
+ %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+ %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
+ ret <4 x i32> %4
+}
+
+define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_0
+; CHECK-NEXT: ret <16 x i16> %A
+ %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
+ ret <16 x i16> %3
+}
+
+define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_8
+; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+ %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
+ %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
+ %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+ %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
+ ret <16 x i16> %4
+}
+
+define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
+; CHECK-LABEL: @test_avx2_psra_d_0
+; CHECK-NEXT: ret <8 x i32> %A
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+ %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @test_avx2_psra_d_8() {
+; CHECK-LABEL: @test_avx2_psra_d_8
+; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+ %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
+ %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+ %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
+ ret <8 x i32> %4
+}
+
+define <2 x i64> @test_sse2_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_1
+; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+}
+
+define <4 x i64> @test_avx2_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_1
+; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+}
+
+define <2 x i64> @test_sse2_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+define <2 x i64> @test_sse2_psrl_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_psrl_1
+; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+}
+
+define <4 x i64> @test_avx2_psrl_1() {
+ %S = bitcast i32 1 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_psrl_1
+; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+}
+
+define <2 x i64> @test_sse2_psrl_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
+ %6 = bitcast <8 x i16> %5 to <4 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <4 x i32> %8 to <2 x i64>
+ %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <2 x i64> %10 to <8 x i16>
+ %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+ %13 = bitcast <8 x i16> %12 to <4 x i32>
+ %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+ %15 = bitcast <4 x i32> %14 to <2 x i64>
+ %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+ ret <2 x i64> %16
+; CHECK: test_sse2_psrl_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_psrl_0() {
+ %S = bitcast i32 128 to i32
+ %1 = zext i32 %S to i64
+ %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+ %3 = insertelement <2 x i64> %2, i64 0, i32 1
+ %4 = bitcast <2 x i64> %3 to <8 x i16>
+ %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+ %6 = bitcast <16 x i16> %5 to <8 x i32>
+ %7 = bitcast <2 x i64> %3 to <4 x i32>
+ %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+ %9 = bitcast <8 x i32> %8 to <4 x i64>
+ %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+ %11 = bitcast <4 x i64> %10 to <16 x i16>
+ %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+ %13 = bitcast <16 x i16> %12 to <8 x i32>
+ %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+ %15 = bitcast <8 x i32> %14 to <4 x i64>
+ %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+ ret <4 x i64> %16
+; CHECK: test_avx2_psrl_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
+
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/x86-xop.ll b/test/Transforms/InstCombine/x86-xop.ll
new file mode 100644
index 000000000000..176c504989df
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-xop.ll
@@ -0,0 +1,209 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_slt_v2i64
+; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ult_v2i64
+; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_sle_v2i64
+; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ule_v2i64
+; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+ %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %1
+}
+
+define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sgt_v4i32
+; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_ugt_v4i32
+; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sge_v4i32
+; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_uge_v4i32
+; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+ %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %1
+}
+
+define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_seq_v8i16
+; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_ueq_v8i16
+; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_sne_v8i16
+; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_une_v8i16
+; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+ %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %1
+}
+
+define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_strue_v16i8
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_utrue_v16i8
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_sfalse_v16i8
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_ufalse_v16i8
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+ %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %1
+}
+
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 33d5a2a9fda5..c8debcbac226 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -63,8 +63,8 @@ define i32 @test7(i32 %A, i32 %B) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: %A1 = and i32 %A, 7
; CHECK-NEXT: %B1 = and i32 %B, 128
-; CHECK-NEXT: %C1.1 = or i32 %A1, %B1
-; CHECK-NEXT: ret i32 %C1.1
+; CHECK-NEXT: %C11 = or i32 %A1, %B1
+; CHECK-NEXT: ret i32 %C11
%A1 = and i32 %A, 7 ; <i32> [#uses=1]
%B1 = and i32 %B, 128 ; <i32> [#uses=1]
%C1 = xor i32 %A1, %B1 ; <i32> [#uses=1]
@@ -96,8 +96,8 @@ define i1 @test9(i8 %A) {
define i8 @test10(i8 %A) {
; CHECK-LABEL: @test10(
; CHECK-NEXT: %B = and i8 %A, 3
-; CHECK-NEXT: %C.1 = or i8 %B, 4
-; CHECK-NEXT: ret i8 %C.1
+; CHECK-NEXT: %C1 = or i8 %B, 4
+; CHECK-NEXT: ret i8 %C1
%B = and i8 %A, 3 ; <i8> [#uses=1]
%C = xor i8 %B, 4 ; <i8> [#uses=1]
ret i8 %C
diff --git a/test/Transforms/InstSimplify/add-mask.ll b/test/Transforms/InstSimplify/add-mask.ll
new file mode 100644
index 000000000000..1e53cc5bc7fa
--- /dev/null
+++ b/test/Transforms/InstSimplify/add-mask.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define i1 @test(i32 %a) {
+; CHECK-LABEL: @test
+; CHECK: ret i1 false
+ %rhs = add i32 %a, -1
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+define i1 @test2(i32 %a) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 false
+ %rhs = add i32 %a, 1
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+define i1 @test3(i32 %a) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+ %rhs = add i32 %a, 7
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+@B = external global i32
+declare void @llvm.assume(i1)
+
+; Known bits without a constant
+define i1 @test4(i32 %a) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 false
+ %b = load i32, i32* @B
+ %b.and = and i32 %b, 1
+ %b.cnd = icmp eq i32 %b.and, 1
+ call void @llvm.assume(i1 %b.cnd)
+
+ %rhs = add i32 %a, %b
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+; Negative test - even number
+define i1 @test5(i32 %a) {
+; CHECK-LABEL: @test5
+; CHECK: ret i1 %res
+ %rhs = add i32 %a, 2
+ %and = and i32 %a, %rhs
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
+
+define i1 @test6(i32 %a) {
+; CHECK-LABEL: @test6
+; CHECK: ret i1 false
+ %lhs = add i32 %a, -1
+ %and = and i32 %lhs, %a
+ %res = icmp eq i32 %and, 1
+ ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
index 5d314db7133d..36844289aaf0 100644
--- a/test/Transforms/InstSimplify/apint-or.ll
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instsimplify -S | not grep or
+; RUN: opt < %s -instsimplify -S | FileCheck %s
; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
define i39 @test1(i39 %V, i39 %M) {
@@ -12,14 +12,28 @@ define i39 @test1(i39 %V, i39 %M) {
%D = and i39 %V, 274877906943
%R = or i39 %B, %D
ret i39 %R
-; CHECK-LABEL @test1
+; CHECK-LABEL: @test1
; CHECK-NEXT: and {{.*}}, -274877906944
; CHECK-NEXT: add
; CHECK-NEXT: ret
}
+define i7 @test2(i7 %X) {
+ %Y = or i7 %X, 0
+ ret i7 %Y
+; CHECK-LABEL: @test2
+; CHECK-NEXT: ret i7 %X
+}
+
+define i17 @test3(i17 %X) {
+ %Y = or i17 %X, -1
+ ret i17 %Y
+; CHECK-LABEL: @test3
+; CHECK-NEXT: ret i17 -1
+}
+
; Test the case where Integer BitWidth > 64 && BitWidth <= 1024.
-define i399 @test2(i399 %V, i399 %M) {
+define i399 @test4(i399 %V, i399 %M) {
;; If we have: ((V + N) & C1) | (V & C2)
;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
;; replace with V+N.
@@ -30,8 +44,22 @@ define i399 @test2(i399 %V, i399 %M) {
%D = and i399 %V, 274877906943
%R = or i399 %B, %D
ret i399 %R
-; CHECK-LABEL @test2
+; CHECK-LABEL: @test4
; CHECK-NEXT: and {{.*}}, 18446742974197923840
; CHECK-NEXT: add
; CHECK-NEXT: ret
}
+
+define i777 @test5(i777 %X) {
+ %Y = or i777 %X, 0
+ ret i777 %Y
+; CHECK-LABEL: @test5
+; CHECK-NEXT: ret i777 %X
+}
+
+define i117 @test6(i117 %X) {
+ %Y = or i117 %X, -1
+ ret i117 %Y
+; CHECK-LABEL: @test6
+; CHECK-NEXT: ret i117 -1
+}
diff --git a/test/Transforms/InstSimplify/bswap.ll b/test/Transforms/InstSimplify/bswap.ll
new file mode 100644
index 000000000000..7bc3af9e307f
--- /dev/null
+++ b/test/Transforms/InstSimplify/bswap.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -S -instsimplify | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 false
+ %a = or i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %res = icmp eq i16 %b, 0
+ ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 false
+ %a = or i16 %arg, 1024
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %res = icmp eq i16 %b, 0
+ ret i1 %res
+}
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+ %a = and i16 %arg, 1
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 1
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
+
+define i1 @test4(i16 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 false
+ %a = and i16 %arg, 511
+ %b = call i16 @llvm.bswap.i16(i16 %a)
+ %and = and i16 %b, 256
+ %res = icmp eq i16 %and, 1
+ ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 07c90d8f1eb8..6e66fbfede9f 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -1164,3 +1164,11 @@ define i1 @tautological8(i32 %A, i32 %B) {
; CHECK-LABEL: @tautological8(
; CHECK: ret i1 false
}
+
+define i1 @tautological9(i32 %x) {
+ %add = add nuw i32 %x, 13
+ %cmp = icmp ne i32 %add, 12
+ ret i1 %cmp
+; CHECK-LABEL: @tautological9(
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/implies.ll b/test/Transforms/InstSimplify/implies.ll
new file mode 100644
index 000000000000..2e3c9591b079
--- /dev/null
+++ b/test/Transforms/InstSimplify/implies.ll
@@ -0,0 +1,217 @@
+; RUN: opt -S %s -instsimplify | FileCheck %s
+
+; A ==> A -> true
+define i1 @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+; CHECK: ret i1 true
+ %var29 = icmp slt i32 %i, %length.i
+ %res = icmp uge i1 %var29, %var29
+ ret i1 %res
+}
+
+; i +_{nsw} C_{>0} <s L ==> i <s L -> true
+define i1 @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+ %iplus1 = add nsw i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i + C_{>0} <s L ==> i <s L -> unknown without the nsw
+define i1 @test2_neg(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg
+; CHECK: ret i1 %res
+ %iplus1 = add i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; sle is not implication
+define i1 @test2_neg2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg2
+; CHECK: ret i1 %res
+ %iplus1 = add i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp sle i1 %var30, %var29
+ ret i1 %res
+}
+
+; The binary operator has to be an add
+define i1 @test2_neg3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg3
+; CHECK: ret i1 %res
+ %iplus1 = sub nsw i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i +_{nsw} C_{>0} <s L ==> i <s L -> true
+; With an inverted conditional (ule B A rather than canonical ugt A B
+define i1 @test3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+ %iplus1 = add nsw i32 %i, 1
+ %var29 = icmp slt i32 %i, %length.i
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp uge i1 %var29, %var30
+ ret i1 %res
+}
+
+; i +_{nuw} C <u L ==> i <u L
+define i1 @test4(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+ %iplus1 = add nuw i32 %i, 1
+ %var29 = icmp ult i32 %i, %length.i
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; A ==> A for vectors
+define <4 x i1> @test5(<4 x i1> %vec) {
+; CHECK-LABEL: @test5
+; CHECK: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+ %res = icmp ule <4 x i1> %vec, %vec
+ ret <4 x i1> %res
+}
+
+; Don't crash on vector inputs - pr25040
+define <4 x i1> @test6(<4 x i1> %a, <4 x i1> %b) {
+; CHECK-LABEL: @test6
+; CHECK: ret <4 x i1> %res
+ %res = icmp ule <4 x i1> %a, %b
+ ret <4 x i1> %res
+}
+
+; i +_{nsw} 1 <s L ==> i < L +_{nsw} 1
+define i1 @test7(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test7(
+; CHECK: ret i1 true
+ %iplus1 = add nsw i32 %i, 1
+ %len.plus.one = add nsw i32 %length.i, 1
+ %var29 = icmp slt i32 %i, %len.plus.one
+ %var30 = icmp slt i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i +_{nuw} 1 <s L ==> i < L +_{nuw} 1
+define i1 @test8(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK: ret i1 true
+ %iplus1 = add nuw i32 %i, 1
+ %len.plus.one = add nuw i32 %length.i, 1
+ %var29 = icmp ult i32 %i, %len.plus.one
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+; i +_{nuw} C <s L ==> i < L, even if C is negative
+define i1 @test9(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test9(
+; CHECK: ret i1 true
+ %iplus1 = add nuw i32 %i, -100
+ %var29 = icmp ult i32 %i, %length.i
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp ule i1 %var30, %var29
+ ret i1 %res
+}
+
+define i1 @test10(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test10(
+; CHECK: ret i1 true
+
+ %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000
+ %large = or i32 %x, 100
+ %small = or i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test11(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+ %large = or i32 %x, 100
+ %small = or i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test12(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test12(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+ %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000
+ %large = or i32 %x, 65536 ;; 65536 == 0x00010000
+ %small = or i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test13(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test13(
+; CHECK: ret i1 true
+
+ %large = add nuw i32 %x, 100
+ %small = add nuw i32 %x, 90
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test14(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test14(
+; CHECK: ret i1 true
+
+ %x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f
+ %large = or i32 %x, 8224 ;; == 0x2020
+ %small = or i32 %x, 4112 ;; == 0x1010
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+define i1 @test15(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test15(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+ %large = add nuw i32 %x, 100
+ %small = add nuw i32 %x, 110
+ %known = icmp ult i32 %large, %length.i
+ %to.prove = icmp ult i32 %small, %length.i
+ %res = icmp ule i1 %known, %to.prove
+ ret i1 %res
+}
+
+; X >=(s) Y == X ==> Y (i1 1 becomes -1 for reasoning)
+define i1 @test_sge(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test_sge
+; CHECK: ret i1 true
+ %iplus1 = add nsw nuw i32 %i, 1
+ %var29 = icmp ult i32 %i, %length.i
+ %var30 = icmp ult i32 %iplus1, %length.i
+ %res = icmp sge i1 %var30, %var29
+ ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/shift-128-kb.ll b/test/Transforms/InstSimplify/shift-128-kb.ll
new file mode 100644
index 000000000000..3f69ecccaf5b
--- /dev/null
+++ b/test/Transforms/InstSimplify/shift-128-kb.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) {
+entry:
+ %conv = zext i32 %IntegerBitWidth to i64
+ %sub = sub i64 128, %conv
+ %conv1 = trunc i64 %sub to i32
+ %conv2 = zext i64 %Val to i128
+ %sh_prom = zext i32 %conv1 to i128
+ %shl = shl i128 %conv2, %sh_prom
+ %shr = ashr i128 %shl, %sh_prom
+ %cmp = icmp slt i128 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @_Z10isNegativemj
+; CHECK-NOT: ret i1 false
+; CHECK: ret i1 %cmp
+
diff --git a/test/Transforms/InstSimplify/shr-nop.ll b/test/Transforms/InstSimplify/shr-nop.ll
index b0dc8731a112..edabcc314ea6 100644
--- a/test/Transforms/InstSimplify/shr-nop.ll
+++ b/test/Transforms/InstSimplify/shr-nop.ll
@@ -244,7 +244,7 @@ define i1 @ashr_ne_opposite_msb(i8 %a) {
}
; CHECK-LABEL: @exact_ashr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_eq_shift_gt(i8 %a) {
%shr = ashr exact i8 -2, %a
%cmp = icmp eq i8 %shr, -8
@@ -252,7 +252,7 @@ define i1 @exact_ashr_eq_shift_gt(i8 %a) {
}
; CHECK-LABEL: @exact_ashr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_ne_shift_gt(i8 %a) {
%shr = ashr exact i8 -2, %a
%cmp = icmp ne i8 %shr, -8
@@ -260,7 +260,7 @@ define i1 @exact_ashr_ne_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_ashr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
%shr = ashr i8 -2, %a
%cmp = icmp eq i8 %shr, -8
@@ -268,7 +268,7 @@ define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_ashr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
define i1 @nonexact_ashr_ne_shift_gt(i8 %a) {
%shr = ashr i8 -2, %a
%cmp = icmp ne i8 %shr, -8
@@ -292,7 +292,7 @@ define i1 @exact_lshr_ne_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_lshr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
%shr = lshr i8 2, %a
%cmp = icmp eq i8 %shr, 8
@@ -300,7 +300,7 @@ define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
}
; CHECK-LABEL: @nonexact_lshr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
define i1 @nonexact_lshr_ne_shift_gt(i8 %a) {
%shr = ashr i8 2, %a
%cmp = icmp ne i8 %shr, 8
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index c50b6fc61c8e..58f3c1d09cc2 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -3,11 +3,11 @@
@A = global i32 0
; CHECK: @A = internal global i32 0
-@B = alias i32* @A
-; CHECK: @B = internal alias i32* @A
+@B = alias i32, i32* @A
+; CHECK: @B = internal alias i32, i32* @A
-@C = alias i32* @A
-; CHECK: @C = internal alias i32* @A
+@C = alias i32, i32* @A
+; CHECK: @C = internal alias i32, i32* @A
define i32 @main() {
%tmp = load i32, i32* @C
diff --git a/test/Transforms/Internalize/comdat.ll b/test/Transforms/Internalize/comdat.ll
new file mode 100644
index 000000000000..ac536f7eb656
--- /dev/null
+++ b/test/Transforms/Internalize/comdat.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -internalize -internalize-public-api-list c1 -internalize-public-api-list c2 -internalize-public-api-list c3 -internalize-public-api-list c4 -S | FileCheck %s
+
+$c1 = comdat any
+$c2 = comdat any
+$c3 = comdat any
+$c4 = comdat any
+
+; CHECK: @c1_c = global i32 0, comdat($c1)
+@c1_c = global i32 0, comdat($c1)
+
+; CHECK: @c2_b = internal global i32 0{{$}}
+@c2_b = global i32 0, comdat($c2)
+
+; CHECK: @c3 = global i32 0, comdat{{$}}
+@c3 = global i32 0, comdat
+
+; CHECK: @c4_a = internal global i32 0, comdat($c4)
+@c4_a = internal global i32 0, comdat($c4)
+
+; CHECK: @c1_d = alias i32, i32* @c1_c
+@c1_d = alias i32, i32* @c1_c
+
+; CHECK: @c2_c = internal alias i32, i32* @c2_b
+@c2_c = alias i32, i32* @c2_b
+
+; CHECK: @c4 = alias i32, i32* @c4_a
+@c4 = alias i32, i32* @c4_a
+
+; CHECK: define void @c1() comdat {
+define void @c1() comdat {
+ ret void
+}
+
+; CHECK: define void @c1_a() comdat($c1) {
+define void @c1_a() comdat($c1) {
+ ret void
+}
+
+; CHECK: define internal void @c2() {
+define internal void @c2() comdat {
+ ret void
+}
+
+; CHECK: define internal void @c2_a() {
+define void @c2_a() comdat($c2) {
+ ret void
+}
+
+; CHECK: define void @c3_a() comdat($c3) {
+define void @c3_a() comdat($c3) {
+ ret void
+}
diff --git a/test/Transforms/Internalize/local-visibility.ll b/test/Transforms/Internalize/local-visibility.ll
index b09a136e5263..0d73f21972aa 100644
--- a/test/Transforms/Internalize/local-visibility.ll
+++ b/test/Transforms/Internalize/local-visibility.ll
@@ -10,10 +10,10 @@
; CHECK: @protected.variable = internal global i32 0
@protected.variable = protected global i32 0
-; CHECK: @hidden.alias = internal alias i32* @global
-@hidden.alias = hidden alias i32* @global
-; CHECK: @protected.alias = internal alias i32* @global
-@protected.alias = protected alias i32* @global
+; CHECK: @hidden.alias = internal alias i32, i32* @global
+@hidden.alias = hidden alias i32, i32* @global
+; CHECK: @protected.alias = internal alias i32, i32* @global
+@protected.alias = protected alias i32, i32* @global
; CHECK: define internal void @hidden.function() {
define hidden void @hidden.function() {
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 32cc4de9285a..46c92bc1f577 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -483,7 +483,7 @@ declare void @g()
declare void @j()
declare void @k()
-; CHECK: define void @h(i32 %p) {
+; CHECK-LABEL: define void @h(i32 %p) {
define void @h(i32 %p) {
%x = icmp ult i32 %p, 5
br i1 %x, label %l1, label %l2
@@ -513,4 +513,36 @@ l5:
; CHECK: }
}
+; CHECK-LABEL: define void @h_con(i32 %p) {
+define void @h_con(i32 %p) {
+ %x = icmp ult i32 %p, 5
+ br i1 %x, label %l1, label %l2
+
+l1:
+ call void @j()
+ br label %l3
+
+l2:
+ call void @k()
+ br label %l3
+
+l3:
+; CHECK: call void @g() [[CON:#[0-9]+]]
+; CHECK-NOT: call void @g() [[CON]]
+ call void @g() convergent
+ %y = icmp ult i32 %p, 5
+ br i1 %y, label %l4, label %l5
+
+l4:
+ call void @j()
+ ret void
+
+l5:
+ call void @k()
+ ret void
+; CHECK: }
+}
+
+
; CHECK: attributes [[NOD]] = { noduplicate }
+; CHECK: attributes [[CON]] = { convergent }
diff --git a/test/Transforms/JumpThreading/implied-cond.ll b/test/Transforms/JumpThreading/implied-cond.ll
new file mode 100644
index 000000000000..3d1717e91261
--- /dev/null
+++ b/test/Transforms/JumpThreading/implied-cond.ll
@@ -0,0 +1,98 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+
+declare void @side_effect(i32)
+
+define void @test0(i32 %i, i32 %len) {
+; CHECK-LABEL: @test0(
+ entry:
+ call void @side_effect(i32 0)
+ %i.inc = add nuw i32 %i, 1
+ %c0 = icmp ult i32 %i.inc, %len
+ br i1 %c0, label %left, label %right
+
+ left:
+; CHECK: entry:
+; CHECK: br i1 %c0, label %left0, label %right
+
+; CHECK: left0:
+; CHECK: call void @side_effect
+; CHECK-NOT: br i1 %c1
+; CHECK: call void @side_effect
+ call void @side_effect(i32 0)
+ %c1 = icmp ult i32 %i, %len
+ br i1 %c1, label %left0, label %right
+
+ left0:
+ call void @side_effect(i32 0)
+ ret void
+
+ right:
+ %t = phi i32 [ 1, %left ], [ 2, %entry ]
+ call void @side_effect(i32 %t)
+ ret void
+}
+
+define void @test1(i32 %i, i32 %len) {
+; CHECK-LABEL: @test1(
+ entry:
+ call void @side_effect(i32 0)
+ %i.inc = add nsw i32 %i, 1
+ %c0 = icmp slt i32 %i.inc, %len
+ br i1 %c0, label %left, label %right
+
+ left:
+; CHECK: entry:
+; CHECK: br i1 %c0, label %left0, label %right
+
+; CHECK: left0:
+; CHECK: call void @side_effect
+; CHECK-NOT: br i1 %c1
+; CHECK: call void @side_effect
+ call void @side_effect(i32 0)
+ %c1 = icmp slt i32 %i, %len
+ br i1 %c1, label %left0, label %right
+
+ left0:
+ call void @side_effect(i32 0)
+ ret void
+
+ right:
+ %t = phi i32 [ 1, %left ], [ 2, %entry ]
+ call void @side_effect(i32 %t)
+ ret void
+}
+
+define void @test2(i32 %i, i32 %len, i1* %c.ptr) {
+; CHECK-LABEL: @test2(
+
+; CHECK: entry:
+; CHECK: br i1 %c0, label %cont, label %right
+; CHECK: cont:
+; CHECK: br i1 %c, label %left0, label %right
+; CHECK: left0:
+; CHECK: call void @side_effect(i32 0)
+; CHECK: call void @side_effect(i32 0)
+ entry:
+ call void @side_effect(i32 0)
+ %i.inc = add nsw i32 %i, 1
+ %c0 = icmp slt i32 %i.inc, %len
+ br i1 %c0, label %cont, label %right
+
+ cont:
+ %c = load i1, i1* %c.ptr
+ br i1 %c, label %left, label %right
+
+ left:
+ call void @side_effect(i32 0)
+ %c1 = icmp slt i32 %i, %len
+ br i1 %c1, label %left0, label %right
+
+ left0:
+ call void @side_effect(i32 0)
+ ret void
+
+ right:
+ %t = phi i32 [ 1, %left ], [ 2, %entry ], [ 3, %cont ]
+ call void @side_effect(i32 %t)
+ ret void
+}
diff --git a/test/Transforms/JumpThreading/phi-known.ll b/test/Transforms/JumpThreading/phi-known.ll
new file mode 100644
index 000000000000..8eaf57f748ac
--- /dev/null
+++ b/test/Transforms/JumpThreading/phi-known.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -jump-threading %s | FileCheck %s
+
+; Value of predicate known on all inputs (trivial case)
+; Note: InstCombine/EarlyCSE would also get this case
+define void @test(i8* %p, i8** %addr) {
+; CHECK-LABEL: @test
+entry:
+ %cmp0 = icmp eq i8* %p, null
+ br i1 %cmp0, label %exit, label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: br label %loop
+ %p1 = phi i8* [%p, %entry], [%p1, %loop]
+ %cmp1 = icmp eq i8* %p1, null
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ ret void
+}
+
+; Value of predicate known on all inputs (non-trivial)
+define void @test2(i8* %p) {
+; CHECK-LABEL: @test2
+entry:
+ %cmp0 = icmp eq i8* %p, null
+ br i1 %cmp0, label %exit, label %loop
+loop:
+ %p1 = phi i8* [%p, %entry], [%p2, %backedge]
+ %cmp1 = icmp eq i8* %p1, null
+ br i1 %cmp1, label %exit, label %backedge
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK-NEXT: phi
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load
+; CHECK-NEXT: cmp
+; CHECK-NEXT: br
+; CHECK-DAG: label %backedge
+ %addr = bitcast i8* %p1 to i8**
+ %p2 = load i8*, i8** %addr
+ %cmp2 = icmp eq i8* %p2, null
+ br i1 %cmp2, label %exit, label %loop
+exit:
+ ret void
+}
+
+; If the inputs don't branch the same way, we can't rewrite
+; Well, we could unroll this loop exactly twice, but that's
+; a different transform.
+define void @test_mixed(i8* %p) {
+; CHECK-LABEL: @test_mixed
+entry:
+ %cmp0 = icmp eq i8* %p, null
+ br i1 %cmp0, label %exit, label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: %cmp1 = icmp
+; CHECK-NEXT: br i1 %cmp1
+ %p1 = phi i8* [%p, %entry], [%p1, %loop]
+ %cmp1 = icmp ne i8* %p1, null
+ br i1 %cmp1, label %exit, label %loop
+exit:
+ ret void
+}
+
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index d0df7725f722..595cacbcbf54 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -91,6 +91,36 @@ L3:
}
+; Jump threading of indirectbr with select as address. Test increased
+; duplication threshold for cases where indirectbr is being threaded
+; through.
+
+; CHECK-LABEL: @test_indirectbr_thresh(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L3
+; CHECK-NOT: indirectbr
+define void @test_indirectbr_thresh(i1 %cond, i8* %address) nounwind {
+entry:
+ br i1 %cond, label %L0, label %L3
+L0:
+ %indirect.goto.dest = select i1 %cond, i8* blockaddress(@test_indirectbr_thresh, %L1), i8* %address
+ call void @quux()
+ call void @quux()
+ call void @quux()
+ indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+ call void @foo()
+ ret void
+L2:
+ call void @bar()
+ ret void
+L3:
+ call void @baz()
+ ret void
+}
+
+
; A more complicated case: the condition is a select based on a comparison.
; CHECK-LABEL: @test_switch_cmp(
diff --git a/test/Transforms/JumpThreading/update-edge-weight.ll b/test/Transforms/JumpThreading/update-edge-weight.ll
new file mode 100644
index 000000000000..58cd71861d8a
--- /dev/null
+++ b/test/Transforms/JumpThreading/update-edge-weight.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -jump-threading %s | FileCheck %s
+
+; Test if edge weights are properly updated after jump threading.
+
+; CHECK: !2 = !{!"branch_weights", i32 1629125526, i32 518358122}
+
+define void @foo(i32 %n) !prof !0 {
+entry:
+ %cmp = icmp sgt i32 %n, 10
+ br i1 %cmp, label %if.then.1, label %if.else.1, !prof !1
+
+if.then.1:
+ tail call void @a()
+ br label %if.cond
+
+if.else.1:
+ tail call void @b()
+ br label %if.cond
+
+if.cond:
+ %cmp1 = icmp sgt i32 %n, 5
+ br i1 %cmp1, label %if.then.2, label %if.else.2, !prof !2
+
+if.then.2:
+ tail call void @c()
+ br label %if.end
+
+if.else.2:
+ tail call void @d()
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+
+!0 = !{!"function_entry_count", i64 1}
+!1 = !{!"branch_weights", i32 10, i32 5}
+!2 = !{!"branch_weights", i32 10, i32 1}
diff --git a/test/Transforms/LCSSA/mixed-catch.ll b/test/Transforms/LCSSA/mixed-catch.ll
new file mode 100644
index 000000000000..95d5b17bf081
--- /dev/null
+++ b/test/Transforms/LCSSA/mixed-catch.ll
@@ -0,0 +1,95 @@
+; RUN: opt -lcssa -S < %s | FileCheck %s
+
+; This test is based on the following C++ code:
+;
+; void f()
+; {
+; for (int i=0; i<12; i++) {
+; try {
+; if (i==3)
+; throw i;
+; } catch (int) {
+; continue;
+; } catch (...) { }
+; if (i==3) break;
+; }
+; }
+;
+; The loop info analysis identifies the catch pad for the second catch as being
+; outside the loop (because it returns to %for.end) but the associated
+; catchswitch block is identified as being inside the loop. Because of this
+; analysis, the LCSSA pass wants to create a PHI node in the catchpad block
+; for the catchswitch value, but this is a token, so it can't.
+
+define void @f() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %tmp = alloca i32, align 4
+ %i7 = alloca i32, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i.0, 12
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %cond = icmp eq i32 %i.0, 3
+ br i1 %cond, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ store i32 %i.0, i32* %tmp, align 4
+ %tmp1 = bitcast i32* %tmp to i8*
+ invoke void @_CxxThrowException(i8* %tmp1, %eh.ThrowInfo* nonnull @_TI1H) #1
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %if.then
+ %tmp2 = catchswitch within none [label %catch, label %catch2] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %tmp3 = catchpad within %tmp2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %i7]
+ catchret from %tmp3 to label %for.inc
+
+catch2: ; preds = %catch.dispatch
+ %tmp4 = catchpad within %tmp2 [i8* null, i32 64, i8* null]
+ catchret from %tmp4 to label %for.end
+
+for.inc: ; preds = %catch, %for.body
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %catch2, %for.cond
+ ret void
+
+unreachable: ; preds = %if.then
+ unreachable
+}
+
+; CHECK-LABEL: define void @f()
+; CHECK: catch2:
+; CHECK-NOT: phi
+; CHECK: %tmp4 = catchpad within %tmp2
+; CHECK: catchret from %tmp4 to label %for.end
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
index 73862db69819..e2b07facd48e 100644
--- a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
+++ b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -licm -disable-output
+; RUN: opt < %s -globals-aa -licm -disable-output
@PL_regcomp_parse = internal global i8* null ; <i8**> [#uses=2]
diff --git a/test/Transforms/LICM/argmemonly-call.ll b/test/Transforms/LICM/argmemonly-call.ll
new file mode 100644
index 000000000000..e2640a1c8deb
--- /dev/null
+++ b/test/Transforms/LICM/argmemonly-call.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -basicaa -licm %s | FileCheck %s
+declare i32 @foo() readonly argmemonly nounwind
+declare i32 @foo2() readonly nounwind
+declare i32 @bar(i32* %loc2) readonly argmemonly nounwind
+
+define void @test(i32* %loc) {
+; CHECK-LABEL: @test
+; CHECK: @foo
+; CHECK-LABEL: loop:
+ br label %loop
+
+loop:
+ %res = call i32 @foo()
+ store i32 %res, i32* %loc
+ br label %loop
+}
+
+; Negative test: show argmemonly is required
+define void @test_neg(i32* %loc) {
+; CHECK-LABEL: @test_neg
+; CHECK-LABEL: loop:
+; CHECK: @foo
+ br label %loop
+
+loop:
+ %res = call i32 @foo2()
+ store i32 %res, i32* %loc
+ br label %loop
+}
+
+define void @test2(i32* noalias %loc, i32* noalias %loc2) {
+; CHECK-LABEL: @test2
+; CHECK: @bar
+; CHECK-LABEL: loop:
+ br label %loop
+
+loop:
+ %res = call i32 @bar(i32* %loc2)
+ store i32 %res, i32* %loc
+ br label %loop
+}
+
+; Negative test: %might clobber gep
+define void @test3(i32* %loc) {
+; CHECK-LABEL: @test3
+; CHECK-LABEL: loop:
+; CHECK: @bar
+ br label %loop
+
+loop:
+ %res = call i32 @bar(i32* %loc)
+ %gep = getelementptr i32, i32 *%loc, i64 1000000
+ store i32 %res, i32* %gep
+ br label %loop
+}
+
+
+; Negative test: %loc might alias %loc2
+define void @test4(i32* %loc, i32* %loc2) {
+; CHECK-LABEL: @test4
+; CHECK-LABEL: loop:
+; CHECK: @bar
+ br label %loop
+
+loop:
+ %res = call i32 @bar(i32* %loc2)
+ store i32 %res, i32* %loc
+ br label %loop
+}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index 62429fdc3216..d8ae5e576641 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -36,17 +36,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!26}
!llvm.dbg.sp = !{!0, !6, !9, !10}
-!0 = !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
!1 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!6 = distinct !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{null}
-!9 = !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
-!10 = !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!9 = distinct !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!10 = distinct !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
!11 = !DILocation(line: 281, column: 9, scope: !12)
!12 = distinct !DILexicalBlock(line: 272, column: 5, file: !25, scope: !13)
!13 = distinct !DILexicalBlock(line: 271, column: 5, file: !25, scope: !14)
@@ -55,7 +55,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!16 = !DILocation(line: 284, column: 10, scope: !17)
!17 = distinct !DILexicalBlock(line: 282, column: 9, file: !25, scope: !12)
!18 = !{double undef}
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 268, scope: !14, file: !1, type: !20)
+!19 = !DILocalVariable(name: "temp", line: 268, scope: !14, file: !1, type: !20)
!20 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!21 = !DILocation(line: 286, column: 14, scope: !22)
!22 = distinct !DILexicalBlock(line: 285, column: 13, file: !25, scope: !17)
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
index c384a858d1e6..fd10c5d7503d 100644
--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -388,5 +388,49 @@ for.end: ; preds = %for.inc, %entry
ret void
}
+; In this test we should be able to only hoist load from %cptr. We can't hoist
+; load from %c because it's dereferenceability can depend on %cmp1 condition.
+; By moving it out of the loop we break this dependency and can not rely
+; on the dereferenceability anymore.
+; In other words this test checks that we strip dereferenceability metadata
+; after hoisting an instruction.
+
+; CHECK-LABEL: @test10
+; CHECK: %c = load i32*, i32** %cptr
+; CHECK-NOT: dereferenceable
+; CHECK: if.then:
+; CHECK: load i32, i32* %c, align 4
+
+define void @test10(i32* noalias %a, i32* %b, i32** dereferenceable(8) %cptr, i32 %n) #0 {
+entry:
+ %cmp11 = icmp sgt i32 %n, 0
+ br i1 %cmp11, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %c = load i32*, i32** %cptr, !dereferenceable !0
+ %1 = load i32, i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
attributes #0 = { nounwind uwtable }
!0 = !{i64 4}
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index aec155b5580a..ed669f383fc3 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm"
+; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm"
@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
diff --git a/test/Transforms/LICM/pr23608.ll b/test/Transforms/LICM/pr23608.ll
index 249bc6bf5f63..fe6fd1a1810b 100644
--- a/test/Transforms/LICM/pr23608.ll
+++ b/test/Transforms/LICM/pr23608.ll
@@ -31,7 +31,7 @@ bb2: ; preds = %while.cond
br i1 %tobool, label %bb13, label %bb15
bb13: ; preds = %bb2
-; CHECK-LABEL bb13:
+; CHECK-LABEL: bb13:
; CHECK: %tmp8.le = inttoptr
%.lcssa7 = phi i32* [ %tmp8, %bb2 ]
call void @__msan_warning_noreturn()
diff --git a/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 3aced4850411..dce5698595ac 100644
--- a/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -36,7 +36,7 @@ entry:
; Since the checks to A and A + 4 get merged, this will give us a
; total of 8 compares.
;
-; CHECK: for.body.lver.memcheck:
+; CHECK: for.body.lver.check:
; CHECK: = icmp
; CHECK: = icmp
diff --git a/test/Transforms/LoopDistribute/bounds-expansion-bug.ll b/test/Transforms/LoopDistribute/bounds-expansion-bug.ll
new file mode 100644
index 000000000000..5d1aac6c104c
--- /dev/null
+++ b/test/Transforms/LoopDistribute/bounds-expansion-bug.ll
@@ -0,0 +1,106 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+; When emitting the memchecks for:
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * B[i];
+; =======================
+; C[i] = D[i] * E[i];
+; }
+;
+; we had a bug when expanding the bounds for A and C. These are expanded
+; multiple times and rely on the caching in SCEV expansion to avoid any
+; redundancy. However, due to logic in SCEVExpander::ReuseOrCreateCast, we
+; can get earlier expanded values invalidated when casts are used. This test
+; ensure that we are not using the invalidated values.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a1, i32* %a2,
+ i32* %b,
+ i32* %c1, i32* %c2,
+ i32* %d,
+ i32* %e) {
+entry:
+
+ %cond = icmp eq i32* %e, null
+ br i1 %cond, label %one, label %two
+one:
+ br label %join
+two:
+ br label %join
+join:
+
+; The pointers need to be defined by PHIs in order for the bug to trigger.
+; Because of the PHIs the existing casts won't be at the desired location so a
+; new cast will be emitted and the old cast will get invalidated.
+;
+; These are the steps:
+;
+; 1. After the bounds for A and C are first expanded:
+;
+; join:
+; %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+; %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+; %c5 = bitcast i32* %c to i8*
+; %a3 = bitcast i32* %a to i8*
+;
+; 2. After A is expanded again:
+;
+; join: ; preds = %two, %one
+; %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+; %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+; %a3 = bitcast i32* %a to i8* <--- new
+; %c5 = bitcast i32* %c to i8*
+; %0 = bitcast i32* undef to i8* <--- old, invalidated
+;
+; 3. Finally, when C is expanded again:
+;
+; join: ; preds = %two, %one
+; %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+; %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+; %c5 = bitcast i32* %c to i8* <--- new
+; %a3 = bitcast i32* %a to i8*
+; %0 = bitcast i32* undef to i8* <--- old, invalidated
+; %1 = bitcast i32* undef to i8*
+
+ %a = phi i32* [%a1, %one], [%a2, %two]
+ %c = phi i32* [%c1, %one], [%c2, %two]
+ br label %for.body
+
+
+; CHECK: [[VALUE:%[0-9a-z]+]] = bitcast i32* undef to i8*
+; CHECK-NOT: [[VALUE]]
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %join ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+ %loadE = load i32, i32* %arrayidxE, align 4
+
+ %mulC = mul i32 %loadD, %loadE
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll b/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
new file mode 100644
index 000000000000..2ba746dd6b9e
--- /dev/null
+++ b/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
@@ -0,0 +1,57 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+; If we can't find the bounds for one of the arrays in order to generate the
+; memchecks (e.g., C[i * i] below), loop shold not get distributed.
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * 3;
+; -------------------------------
+; C[i * i] = B[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we didn't distribute by checking that we still have the original
+; number of branches.
+
+@A = common global i32* null, align 8
+@B = common global i32* null, align 8
+@C = common global i32* null, align 8
+
+define void @f() {
+entry:
+ %a = load i32*, i32** @A, align 8
+ %b = load i32*, i32** @B, align 8
+ %c = load i32*, i32** @C, align 8
+ br label %for.body
+; CHECK: br
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %mulA = mul i32 %loadA, 3
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulC = mul i32 %loadB, 2
+
+ %ind_2 = mul i64 %ind, %ind
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind_2
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+; CHECK: br
+; CHECK-NOT: br
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index c633ae95d16f..27a955175b59 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -424,3 +424,110 @@ exit:
ret void
; CHECK: ret void
}
+
+; Recognize loops with a negative stride.
+define void @test15(i32* nocapture %f) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+; CHECK-LABEL: @test15(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; Loop with a negative stride. Verify an aliasing write to f[65536] prevents
+; the creation of a memset.
+define void @test16(i32* nocapture %f) {
+entry:
+ %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ store i32 1, i32* %arrayidx1, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+; CHECK-LABEL: @test16(
+; CHECK-NOT: call void @llvm.memset.p0i8.i64
+; CHECK: ret void
+}
+
+; Handle memcpy-able loops with negative stride.
+define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
+entry:
+ %conv = sext i32 %c to i64
+ %mul = shl nsw i64 %conv, 2
+ %call = tail call noalias i8* @malloc(i64 %mul)
+ %0 = bitcast i8* %call to i32*
+ %tobool.9 = icmp eq i32 %c, 0
+ br i1 %tobool.9, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
+ %dec10 = add nsw i32 %dec10.in, -1
+ %idxprom = sext i32 %dec10 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %1 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
+ store i32 %1, i32* %arrayidx2, align 4
+ %tobool = icmp eq i32 %dec10, 0
+ br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ ret i32* %0
+; CHECK-LABEL: @test17(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret i32*
+}
+
+declare noalias i8* @malloc(i64)
+
+; Handle memcpy-able loops with negative stride.
+; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
+; for (int i = 2047; i >= 0; --i) {
+; a[i] = b[i];
+; }
+; }
+define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+; CHECK-LABEL: @test18(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret
+}
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index bcd862d7a729..a85e48997548 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-darwin10.0.0"
-define void @foo(double* nocapture %a) nounwind ssp {
+define void @foo(double* nocapture %a) nounwind ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata double* %a, i64 0, metadata !5, metadata !DIExpression()), !dbg !8
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !DIExpression()), !dbg !14
@@ -30,17 +30,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!19}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3, function: void (double*)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "li.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
+!5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !7)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!8 = !DILocation(line: 2, column: 18, scope: !0)
!9 = !{}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !11, file: !1, type: !13)
+!10 = !DILocalVariable(name: "i", line: 3, scope: !11, file: !1, type: !13)
!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !18, scope: !12)
!12 = distinct !DILexicalBlock(line: 2, column: 21, file: !18, scope: !0)
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/LoopLoadElim/backward.ll b/test/Transforms/LoopLoadElim/backward.ll
new file mode 100644
index 000000000000..7c750a51a2a3
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/backward.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Simple st->ld forwarding derived from a lexical backward dep.
+;
+; for (unsigned i = 0; i < 100; i++)
+; A[i+1] = A[i] + B[i];
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %load = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %load_1 = load i32, i32* %arrayidx2, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/def-store-before-load.ll b/test/Transforms/LoopLoadElim/def-store-before-load.ll
new file mode 100644
index 000000000000..3dc93f6786e7
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/def-store-before-load.ll
@@ -0,0 +1,35 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; No loop-carried forwarding: The intervening store to A[i] kills the stored
+; value from the previous iteration.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i] = 1;
+; A[i+1] = A[i] + B[i];
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 1, i32* %arrayidx, align 4
+ %a = load i32, i32* %arrayidx, align 4
+ %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %b = load i32, i32* %arrayidxB, align 4
+; CHECK: %add = add i32 %b, %a
+ %add = add i32 %b, %a
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/forward.ll b/test/Transforms/LoopLoadElim/forward.ll
new file mode 100644
index 000000000000..c2b1816530c1
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/forward.ll
@@ -0,0 +1,47 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Simple st->ld forwarding derived from a lexical forwrad dep.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; C[i] = A[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N) {
+
+; CHECK: for.body.lver.check:
+; CHECK: %found.conflict{{.*}} =
+; CHECK-NOT: %found.conflict{{.*}} =
+
+entry:
+; for.body.ph:
+; CHECK: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %for.body.ph ], [ %a_p1, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %store_forwarded, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/memcheck.ll b/test/Transforms/LoopLoadElim/memcheck.ll
new file mode 100644
index 000000000000..8eadd437a5ac
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/memcheck.ll
@@ -0,0 +1,52 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -loop-load-elim -S -runtime-check-per-loop-load-elim=2 < %s | FileCheck %s --check-prefix=AGGRESSIVE
+
+; This needs two pairs of memchecks (A * { C, D }) for a single load
+; elimination which is considered to expansive by default.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; C[i] = A[i] * 2;
+; D[i] = 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N, i32* %D) {
+entry:
+ br label %for.body
+
+; AGGRESSIVE: for.body.lver.check:
+; AGGRESSIVE: %found.conflict{{.*}} =
+; AGGRESSIVE: %found.conflict{{.*}} =
+; AGGRESSIVE-NOT: %found.conflict{{.*}} =
+
+for.body: ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded =
+; AGGRESSIVE: %store_forwarded =
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %a, 2
+; AGGRESSIVE: %c = mul i32 %store_forwarded, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+ store i32 2, i32* %Didx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll b/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
new file mode 100644
index 000000000000..b0c0f3dee86e
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
@@ -0,0 +1,48 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+; In this case the later store forward to the load:
+;
+; for (unsigned i = 0; i < 100; i++) {
+; B[i] = A[i] + 1;
+; A[i+1] = C[i] + 2;
+; A[i+1] = D[i] + 3;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B,
+ i32* noalias nocapture %C, i32* noalias nocapture readonly %D,
+ i64 %N) {
+entry:
+; CHECK: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %addD, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidxA = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %loadA = load i32, i32* %arrayidxA, align 4
+; CHECK: %addA = add i32 %store_forwarded, 1
+ %addA = add i32 %loadA, 1
+
+ %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ store i32 %addA, i32* %arrayidxB, align 4
+
+ %arrayidxC = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %loadC = load i32, i32* %arrayidxC, align 4
+ %addC = add i32 %loadC, 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidxA_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %addC, i32* %arrayidxA_next, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+ %loadD = load i32, i32* %arrayidxD, align 4
+ %addD = add i32 %loadD, 3
+ store i32 %addD, i32* %arrayidxA_next, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/unknown-dep.ll b/test/Transforms/LoopLoadElim/unknown-dep.ll
new file mode 100644
index 000000000000..d2df718ca4c3
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/unknown-dep.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Give up in the presence of unknown deps. Here, the different strides result
+; in unknown dependence:
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; A[2*i] = C[i] + 2;
+; D[i] = A[i] + 2;
+; }
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C,
+ i32* noalias %D, i64 %N) {
+
+entry:
+; for.body.ph:
+; CHECK-NOT: %load_initial =
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded =
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %indvars.m2 = mul nuw nsw i64 %indvars.iv, 2
+ %A2idx = getelementptr inbounds i32, i32* %A, i64 %indvars.m2
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %c = load i32, i32* %Cidx, align 4
+ %a_m2 = add i32 %c, 2
+ store i32 %a_m2, i32* %A2idx, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK-NOT: %d = add i32 %store_forwarded, 2
+; CHECK: %d = add i32 %a, 2
+ %d = add i32 %a, 2
+ store i32 %d, i32* %Didx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopReroll/negative.ll b/test/Transforms/LoopReroll/negative.ll
new file mode 100644
index 000000000000..36f6806e1c37
--- /dev/null
+++ b/test/Transforms/LoopReroll/negative.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+target triple = "aarch64--linux-gnu"
+@buf = global [16 x i8] c"\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A", align 1
+
+define i32 @test1(i32 %len, i8* nocapture readonly %buf) #0 {
+entry:
+ %cmp.13 = icmp sgt i32 %len, 1
+ br i1 %cmp.13, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph: ; preds = %entry
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+;CHECK-NEXT: %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
+;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -1
+;CHECK-NOT: %sub5 = add nsw i32 %len.addr.014, -2
+;CHECK: br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+ %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add4, %while.body ]
+ %len.addr.014 = phi i32 [ %len, %while.body.lr.ph ], [ %sub5, %while.body ]
+ %idxprom = sext i32 %len.addr.014 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %buf, i64 %idxprom
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i64
+ %add = add i64 %conv, %sum4.015
+ %sub = add nsw i32 %len.addr.014, -1
+ %idxprom1 = sext i32 %sub to i64
+ %arrayidx2 = getelementptr inbounds i8, i8* %buf, i64 %idxprom1
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i64
+ %add4 = add i64 %add, %conv3
+ %sub5 = add nsw i32 %len.addr.014, -2
+ %cmp = icmp sgt i32 %sub5, 1
+ br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %add4.lcssa = phi i64 [ %add4, %while.body ]
+ %phitmp = trunc i64 %add4.lcssa to i32
+ br label %while.end
+
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %sum4.0.lcssa = phi i32 [ %phitmp, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+ ret i32 %sum4.0.lcssa
+ unreachable
+}
+
diff --git a/test/Transforms/LoopReroll/reroll_with_dbg.ll b/test/Transforms/LoopReroll/reroll_with_dbg.ll
new file mode 100644
index 000000000000..78b457ed94ab
--- /dev/null
+++ b/test/Transforms/LoopReroll/reroll_with_dbg.ll
@@ -0,0 +1,139 @@
+;RUN: opt < %s -loop-reroll -S | FileCheck %s
+;void foo(float * restrict a, float * restrict b, int n) {
+; for(int i = 0; i < n; i+=4) {
+; a[i] = b[i];
+; a[i+1] = b[i+1];
+; a[i+2] = b[i+2];
+; a[i+3] = b[i+3];
+; }
+;}
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv4t--linux-gnueabi"
+
+; Function Attrs: nounwind
+define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %b, i32 %n) #0 !dbg !4 {
+entry:
+;CHECK-LABEL: @foo
+
+ tail call void @llvm.dbg.value(metadata float* %a, i64 0, metadata !12, metadata !22), !dbg !23
+ tail call void @llvm.dbg.value(metadata float* %b, i64 0, metadata !13, metadata !22), !dbg !24
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !14, metadata !22), !dbg !25
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !22), !dbg !26
+ %cmp.30 = icmp sgt i32 %n, 0, !dbg !27
+ br i1 %cmp.30, label %for.body.preheader, label %for.cond.cleanup, !dbg !29
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !30
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup, !dbg !32
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void, !dbg !32
+
+for.body: ; preds = %for.body.preheader, %for.body
+;CHECK: for.body:
+;CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, {{.*}} ]
+;CHECK: load
+;CHECK: store
+;CHECK-NOT: load
+;CHECK-NOT: store
+;CHECK: call void @llvm.dbg.value
+;CHECK: %indvar.next = add i32 %indvar, 1
+;CHECK: icmp eq i32 %indvar
+ %i.031 = phi i32 [ %add13, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %b, i32 %i.031, !dbg !30
+ %0 = bitcast float* %arrayidx to i32*, !dbg !30
+ %1 = load i32, i32* %0, align 4, !dbg !30, !tbaa !33
+ %arrayidx1 = getelementptr inbounds float, float* %a, i32 %i.031, !dbg !37
+ %2 = bitcast float* %arrayidx1 to i32*, !dbg !38
+ store i32 %1, i32* %2, align 4, !dbg !38, !tbaa !33
+ %add = or i32 %i.031, 1, !dbg !39
+ %arrayidx2 = getelementptr inbounds float, float* %b, i32 %add, !dbg !40
+ %3 = bitcast float* %arrayidx2 to i32*, !dbg !40
+ %4 = load i32, i32* %3, align 4, !dbg !40, !tbaa !33
+ %arrayidx4 = getelementptr inbounds float, float* %a, i32 %add, !dbg !41
+ %5 = bitcast float* %arrayidx4 to i32*, !dbg !42
+ store i32 %4, i32* %5, align 4, !dbg !42, !tbaa !33
+ %add5 = or i32 %i.031, 2, !dbg !43
+ %arrayidx6 = getelementptr inbounds float, float* %b, i32 %add5, !dbg !44
+ %6 = bitcast float* %arrayidx6 to i32*, !dbg !44
+ %7 = load i32, i32* %6, align 4, !dbg !44, !tbaa !33
+ %arrayidx8 = getelementptr inbounds float, float* %a, i32 %add5, !dbg !45
+ %8 = bitcast float* %arrayidx8 to i32*, !dbg !46
+ store i32 %7, i32* %8, align 4, !dbg !46, !tbaa !33
+ %add9 = or i32 %i.031, 3, !dbg !47
+ %arrayidx10 = getelementptr inbounds float, float* %b, i32 %add9, !dbg !48
+ %9 = bitcast float* %arrayidx10 to i32*, !dbg !48
+ %10 = load i32, i32* %9, align 4, !dbg !48, !tbaa !33
+ %arrayidx12 = getelementptr inbounds float, float* %a, i32 %add9, !dbg !49
+ %11 = bitcast float* %arrayidx12 to i32*, !dbg !50
+ store i32 %10, i32* %11, align 4, !dbg !50, !tbaa !33
+ %add13 = add nuw nsw i32 %i.031, 4, !dbg !51
+ tail call void @llvm.dbg.value(metadata i32 %add13, i64 0, metadata !15, metadata !22), !dbg !26
+ %cmp = icmp slt i32 %add13, %n, !dbg !27
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !dbg !29
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18, !19, !20}
+!llvm.ident = !{!21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/weimingz/llvm-build/release/community-tip")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !7, !10}
+!7 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !8)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{!12, !13, !14, !15}
+!12 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 1, type: !7)
+!14 = !DILocalVariable(name: "n", arg: 3, scope: !4, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !10)
+!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 3)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{i32 1, !"wchar_size", i32 4}
+!20 = !{i32 1, !"min_enum_size", i32 4}
+!21 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)"}
+!22 = !DIExpression()
+!23 = !DILocation(line: 1, column: 27, scope: !4)
+!24 = !DILocation(line: 1, column: 47, scope: !4)
+!25 = !DILocation(line: 1, column: 54, scope: !4)
+!26 = !DILocation(line: 2, column: 11, scope: !16)
+!27 = !DILocation(line: 2, column: 20, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!29 = !DILocation(line: 2, column: 3, scope: !16)
+!30 = !DILocation(line: 3, column: 12, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !28, file: !1, line: 2, column: 31)
+!32 = !DILocation(line: 8, column: 1, scope: !4)
+!33 = !{!34, !34, i64 0}
+!34 = !{!"float", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !DILocation(line: 3, column: 5, scope: !31)
+!38 = !DILocation(line: 3, column: 10, scope: !31)
+!39 = !DILocation(line: 4, column: 17, scope: !31)
+!40 = !DILocation(line: 4, column: 14, scope: !31)
+!41 = !DILocation(line: 4, column: 5, scope: !31)
+!42 = !DILocation(line: 4, column: 12, scope: !31)
+!43 = !DILocation(line: 5, column: 17, scope: !31)
+!44 = !DILocation(line: 5, column: 14, scope: !31)
+!45 = !DILocation(line: 5, column: 5, scope: !31)
+!46 = !DILocation(line: 5, column: 12, scope: !31)
+!47 = !DILocation(line: 6, column: 17, scope: !31)
+!48 = !DILocation(line: 6, column: 14, scope: !31)
+!49 = !DILocation(line: 6, column: 5, scope: !31)
+!50 = !DILocation(line: 6, column: 12, scope: !31)
+!51 = !DILocation(line: 2, column: 26, scope: !28)
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 9bcca15ab551..d90841d16270 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -3,7 +3,7 @@
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 {
; CHECK-LABEL: define i32 @tak(
; CHECK: entry
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x
@@ -72,7 +72,7 @@ for.body:
for.inc:
%dec = add i64 %i.0, -1
- tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0), metadata !DIExpression()), !dbg !DILocation(scope: !0)
+ tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !DILocalVariable(scope: !0), metadata !DIExpression()), !dbg !DILocation(scope: !0)
br label %for.cond
for.end:
@@ -84,17 +84,17 @@ for.end:
!llvm.module.flags = !{!20}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3, function: i32 (i32, i32, i32)* @tak)
+!0 = distinct !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", directory: "/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!6 = !DILocalVariable(name: "x", line: 32, arg: 1, scope: !0, file: !1, type: !5)
!7 = !DILocation(line: 32, column: 13, scope: !0)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!8 = !DILocalVariable(name: "y", line: 32, arg: 2, scope: !0, file: !1, type: !5)
!9 = !DILocation(line: 32, column: 20, scope: !0)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "z", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "z", line: 32, arg: 3, scope: !0, file: !1, type: !5)
!11 = !DILocation(line: 32, column: 27, scope: !0)
!12 = !DILocation(line: 33, column: 3, scope: !13)
!13 = distinct !DILexicalBlock(line: 32, column: 30, file: !18, scope: !0)
diff --git a/test/Transforms/LoopSimplify/dbg-loc.ll b/test/Transforms/LoopSimplify/dbg-loc.ll
index 073319bdac3c..b0e14bbcfd7f 100644
--- a/test/Transforms/LoopSimplify/dbg-loc.ll
+++ b/test/Transforms/LoopSimplify/dbg-loc.ll
@@ -16,7 +16,7 @@ declare void @f3()
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label %for.end, !dbg [[LOOPEXIT_LOC:![0-9]+]]
-define linkonce_odr hidden void @foo(%"Length"* %begin, %"Length"* %end) nounwind ssp uwtable align 2 {
+define linkonce_odr hidden void @foo(%"Length"* %begin, %"Length"* %end) nounwind ssp uwtable align 2 !dbg !6 {
entry:
%cmp.4 = icmp eq %"Length"* %begin, %end, !dbg !7
br i1 %cmp.4, label %for.end, label %for.body, !dbg !8
@@ -80,7 +80,7 @@ eh.resume: ; preds = %catch
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "Vector.h", directory: "/tmp")
-!6 = !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, function: void (%"Length"*, %"Length"*)* @foo, variables: !3)
+!6 = distinct !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 73, column: 38, scope: !6)
!8 = !DILocation(line: 73, column: 13, scope: !6)
!9 = !DILocation(line: 73, column: 27, scope: !6)
diff --git a/test/Transforms/LoopSimplify/single-backedge.ll b/test/Transforms/LoopSimplify/single-backedge.ll
index 92fbdca8a6d2..6f3db8fb14fc 100644
--- a/test/Transforms/LoopSimplify/single-backedge.ll
+++ b/test/Transforms/LoopSimplify/single-backedge.ll
@@ -30,7 +30,7 @@ BE2: ; preds = %n br label %Loop
!2 = !{}
!3 = !DISubroutineType(types: !2)
!4 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
-!5 = !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!6 = !DILocation(line: 100, column: 1, scope: !5)
!7 = !DILocation(line: 101, column: 1, scope: !5)
!8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
new file mode 100644
index 000000000000..bf61112a3c3e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
@@ -0,0 +1,156 @@
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
+
+; Test that loops with different maximum offsets for different address
+; spaces are correctly handled.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
+; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
+define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 4095
+ %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
+ %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+ %tmp4 = sext i8 %tmp3 to i32
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+ %tmp7 = add nsw i32 %tmp6, %tmp4
+ store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32(
+; OPT: {{^}}.lr.ph.preheader:
+; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096
+; OPT: br label %.lr.ph
+
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
+define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 4096
+ %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
+ %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+ %tmp4 = sext i8 %tmp3 to i32
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+ %tmp7 = add nsw i32 %tmp6, %tmp4
+ store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
+; OPT: {{^}}.lr.ph
+; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
+; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
+define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 65535
+ %tmp2 = trunc i64 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
+ %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
+ %tmp5 = sext i8 %tmp4 to i32
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp8 = add nsw i32 %tmp7, %tmp5
+ store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32(
+; OPT: {{^}}.lr.ph.preheader:
+; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536
+; OPT: br label %.lr.ph
+
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
+define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+ %tmp = icmp sgt i32 %n, 0
+ br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader: ; preds = %bb
+ br label %.lr.ph
+
+._crit_edge.loopexit: ; preds = %.lr.ph
+ br label %._crit_edge
+
+._crit_edge: ; preds = %._crit_edge.loopexit, %bb
+ ret void
+
+.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+ %tmp1 = add nuw nsw i64 %indvars.iv, 65536
+ %tmp2 = trunc i64 %tmp1 to i32
+ %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
+ %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
+ %tmp5 = sext i8 %tmp4 to i32
+ %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+ %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+ %tmp8 = add nsw i32 %tmp7, %tmp5
+ store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
new file mode 100644
index 000000000000..bd80302a68b8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s
+
+; Test various conditions where OptimizeLoopTermCond doesn't look at a
+; memory instruction use and fails to find the address space.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; CHECK-LABEL: @local_cmp_user(
+; CHECK: bb11:
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
+
+; CHECK: bb:
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2
+; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef
+define void @local_cmp_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i32 %i, 1
+ %c0 = icmp eq i32 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
+ %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
+ %c1 = icmp ult i8 addrspace(3)* %p, undef
+ %i.next = add i32 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
+
+; CHECK-LABEL: @global_cmp_user(
+; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2
+; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2
+define void @global_cmp_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i64 %i, 1
+ %c0 = icmp eq i64 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+ %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
+ %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %i.next = add i64 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
+
+; CHECK-LABEL: @global_gep_user(
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+define void @global_gep_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i32 %i, 1
+ %c0 = icmp eq i32 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+ %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
+ %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %i.next = add i32 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
+
+; CHECK-LABEL: @global_sext_scale_user(
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+define void @global_sext_scale_user() nounwind {
+entry:
+ br label %bb11
+
+bb11:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+ %ii = shl i32 %i, 1
+ %ii.ext = sext i32 %ii to i64
+ %c0 = icmp eq i32 %i, undef
+ br i1 %c0, label %bb13, label %bb
+
+bb:
+ %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+ %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+ %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %i.next = add i32 %i, 1
+ br i1 %c1, label %bb11, label %bb13
+
+bb13:
+ unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 2ad6c2ea52da..788842101080 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -239,33 +239,33 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
%counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
%result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
%.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
- %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+ %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %.05, i32 1) nounwind
%13 = getelementptr inbounds i8, i8* %.05, i32 %ref_stride
- %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+ %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %13, i32 1) nounwind
%15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
%16 = bitcast <2 x i64> %15 to <16 x i8>
%17 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 1
store <16 x i8> %16, <16 x i8>* %.012, align 4
%18 = getelementptr inbounds i8, i8* %.05, i32 %2
- %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+ %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %18, i32 1) nounwind
%20 = getelementptr inbounds i8, i8* %.05, i32 %3
- %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+ %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %20, i32 1) nounwind
%22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
%23 = bitcast <2 x i64> %22 to <16 x i8>
%24 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 2
store <16 x i8> %23, <16 x i8>* %17, align 4
%25 = getelementptr inbounds i8, i8* %.05, i32 %4
- %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+ %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %25, i32 1) nounwind
%27 = getelementptr inbounds i8, i8* %.05, i32 %5
- %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+ %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %27, i32 1) nounwind
%29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
%30 = bitcast <2 x i64> %29 to <16 x i8>
%31 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 3
store <16 x i8> %30, <16 x i8>* %24, align 4
%32 = getelementptr inbounds i8, i8* %.05, i32 %6
- %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+ %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %32, i32 1) nounwind
%34 = getelementptr inbounds i8, i8* %.05, i32 %7
- %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+ %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %34, i32 1) nounwind
%36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
%37 = bitcast <2 x i64> %36 to <16 x i8>
store <16 x i8> %37, <16 x i8>* %31, align 4
@@ -290,7 +290,7 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
ret void
}
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
; Handle chains in which the same offset is used for both loads and
; stores to the same array.
@@ -328,32 +328,32 @@ for.body: ; preds = %for.body, %entry
%i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
%add.ptr = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg
- %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+ %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr, i32 1)
%add.ptr3 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg2
- %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+ %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr3, i32 1)
%add.ptr7 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg6
- %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+ %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr7, i32 1)
%add.ptr11 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg10
- %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
- %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+ %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr11, i32 1)
+ %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %src.addr, i32 1)
%add.ptr17 = getelementptr inbounds i8, i8* %src.addr, i32 %stride
- %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+ %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr17, i32 1)
%add.ptr20 = getelementptr inbounds i8, i8* %src.addr, i32 %mul5
- %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+ %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr20, i32 1)
%add.ptr23 = getelementptr inbounds i8, i8* %src.addr, i32 %mul1
- %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+ %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr23, i32 1)
%vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
%vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
%vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
%vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
%vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
%vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
- tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+ tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
%inc = add nsw i32 %i.0110, 1
%add.ptr45 = getelementptr inbounds i8, i8* %src.addr, i32 8
%exitcond = icmp eq i32 %inc, 4
@@ -363,8 +363,8 @@ for.end: ; preds = %for.body
ret void
}
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
new file mode 100644
index 000000000000..2cb98eb371b2
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
new file mode 100644
index 000000000000..a16065b4dfbd
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; This confirms that NVPTXTTI considers a 64-to-32 integer trunc free. If such
+; truncs were not considered free, LSR would promote (int)i as a separate
+; induction variable in the following example.
+;
+; for (long i = begin; i != end; i += stride)
+; use((int)i);
+;
+; That would be worthless, because "i" is simulated by two 32-bit registers and
+; truncating it to 32-bit is as simple as directly using the register that
+; contains the low bits.
+define void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) {
+; CHECK-LABEL: @trunc_is_free(
+entry:
+ %cmp.4 = icmp eq i64 %begin, %end
+ br i1 %cmp.4, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+; CHECK: for.body:
+ %i.05 = phi i64 [ %add, %for.body ], [ %begin, %for.body.preheader ]
+ %conv = trunc i64 %i.05 to i32
+; CHECK: trunc i64 %{{[^ ]+}} to i32
+ tail call void @_Z3usei(i32 %conv) #2
+ %add = add nsw i64 %i.05, %stride
+ %cmp = icmp eq i64 %add, %end
+ br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare void @_Z3usei(i32)
+
+!nvvm.annotations = !{!0}
+!0 = !{void (i64, i64, i64)* @trunc_is_free, !"kernel", i32 1}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
index 24be0dc42d6d..7925bf01020e 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -23,7 +23,7 @@
; X32: add
; X32: add
; X32: add
-; X32: leal
+; X32: add
; X32: %for.body.3
define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/funclet.ll b/test/Transforms/LoopStrengthReduce/funclet.ll
new file mode 100644
index 000000000000..5d20646141c4
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/funclet.ll
@@ -0,0 +1,216 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare i32 @_except_handler3(...)
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @external(i32*)
+declare void @reserve()
+
+define void @f() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %pad
+
+pad: ; preds = %throw
+ %phi2 = phi i8* [ %tmp96, %throw ]
+ %cs = catchswitch within none [label %unreachable] unwind label %blah2
+
+unreachable:
+ catchpad within %cs []
+ unreachable
+
+blah2:
+ %cleanuppadi4.i.i.i = cleanuppad within none []
+ br label %loop_body
+
+loop_body: ; preds = %iter, %pad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah2 ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out: ; preds = %iter, %loop_body
+ cleanupret from %cleanuppadi4.i.i.i unwind to caller
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @g() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %pad
+
+pad:
+ %phi2 = phi i8* [ %tmp96, %throw ]
+ %cs = catchswitch within none [label %unreachable, label %blah] unwind to caller
+
+unreachable:
+ catchpad within %cs []
+ unreachable
+
+blah:
+ %catchpad = catchpad within %cs []
+ br label %loop_body
+
+unwind_out:
+ catchret from %catchpad to label %leave
+
+leave:
+ ret void
+
+loop_body: ; preds = %iter, %pad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @g(
+; CHECK: blah:
+; CHECK-NEXT: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+
+define void @h() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %pad
+
+pad:
+ %cs = catchswitch within none [label %unreachable, label %blug] unwind to caller
+
+unreachable:
+ catchpad within %cs []
+ unreachable
+
+blug:
+ %phi2 = phi i8* [ %tmp96, %pad ]
+ %catchpad = catchpad within %cs []
+ br label %loop_body
+
+unwind_out:
+ catchret from %catchpad to label %leave
+
+leave:
+ ret void
+
+loop_body: ; preds = %iter, %pad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blug ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @h(
+; CHECK: blug:
+; CHECK: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @i() personality i32 (...)* @_except_handler3 {
+entry:
+ br label %throw
+
+throw: ; preds = %throw, %entry
+ %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+ invoke void @reserve()
+ to label %throw unwind label %catchpad
+
+catchpad: ; preds = %throw
+ %phi2 = phi i8* [ %tmp96, %throw ]
+ %cs = catchswitch within none [label %cp_body] unwind label %cleanuppad
+
+cp_body:
+ catchpad within %cs []
+ br label %loop_head
+
+cleanuppad:
+ cleanuppad within none []
+ br label %loop_head
+
+loop_head:
+ br label %loop_body
+
+loop_body: ; preds = %iter, %catchpad
+ %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %loop_head ]
+ %tmp100 = icmp eq i8* %tmp99, undef
+ br i1 %tmp100, label %unwind_out, label %iter
+
+iter: ; preds = %loop_body
+ %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+ br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out: ; preds = %iter, %loop_body
+ unreachable
+}
+
+; CHECK-LABEL: define void @i(
+; CHECK: ptrtoint i8* %phi2 to i32
+
+define void @test1(i32* %b, i32* %c) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+ invoke void @external(i32* %d.0)
+ to label %for.inc unwind label %catch.dispatch
+
+for.inc: ; preds = %for.cond
+ %incdec.ptr = getelementptr inbounds i32, i32* %d.0, i32 1
+ br label %for.cond
+
+catch.dispatch: ; preds = %for.cond
+ %cs = catchswitch within none [label %catch] unwind label %catch.dispatch.2
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %catch
+ invoke void @external(i32* %c)
+ to label %try.cont.7 unwind label %catch.dispatch.2
+
+catch.dispatch.2: ; preds = %try.cont, %catchendblock
+ %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
+ %cs2 = catchswitch within none [label %catch.4] unwind to caller
+
+catch.4: ; preds = %catch.dispatch.2
+ catchpad within %cs2 [i8* null, i32 64, i8* null]
+ unreachable
+
+try.cont.7: ; preds = %try.cont
+ ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: for.cond:
+; CHECK: %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+
+; CHECK: catch.dispatch.2:
+; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
index b15961a77904..bb5d1654fada 100644
--- a/test/Transforms/LoopStrengthReduce/pr12018.ll
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -16,7 +16,7 @@ for.body: ; preds = %_ZN8nsTArray9Elemen
%tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
%arrayidx = getelementptr inbounds %struct.nsTArray, %struct.nsTArray* %tmp, i32 %i.06
%add = add nsw i32 %i.06, 1
- call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !DISubprogram())
+ call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
br label %_ZN8nsTArray9ElementAtEi.exit
_ZN8nsTArray9ElementAtEi.exit: ; preds = %for.body
@@ -35,4 +35,5 @@ declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !DISubprogram())
+!0 = !DILocalVariable(scope: !1)
+!1 = distinct !DISubprogram()
diff --git a/test/Transforms/LoopStrengthReduce/pr25541.ll b/test/Transforms/LoopStrengthReduce/pr25541.ll
new file mode 100644
index 000000000000..011998b90893
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr25541.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.cond.i
+
+for.cond.i: ; preds = %for.inc.i, %entry
+ %_First.addr.0.i = phi i32* [ null, %entry ], [ %incdec.ptr.i, %for.inc.i ]
+ invoke void @g()
+ to label %for.inc.i unwind label %catch.dispatch.i
+
+catch.dispatch.i: ; preds = %for.cond.i
+ %cs = catchswitch within none [label %for.cond.1.preheader.i] unwind to caller
+
+for.cond.1.preheader.i: ; preds = %catch.dispatch.i
+ %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+ %cmp.i = icmp eq i32* %_First.addr.0.i, null
+ br label %for.cond.1.i
+
+for.cond.1.i: ; preds = %for.body.i, %for.cond.1.preheader.i
+ br i1 %cmp.i, label %for.end.i, label %for.body.i
+
+for.body.i: ; preds = %for.cond.1.i
+ call void @g()
+ br label %for.cond.1.i
+
+for.inc.i: ; preds = %for.cond.i
+ %incdec.ptr.i = getelementptr inbounds i32, i32* %_First.addr.0.i, i64 1
+ br label %for.cond.i
+
+for.end.i: ; preds = %for.cond.1.i
+ catchret from %0 to label %leave
+
+leave: ; preds = %for.end.i
+ ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: %[[PHI:.*]] = phi i64 [ %[[IV_NEXT:.*]], {{.*}} ], [ 0, {{.*}} ]
+; CHECK: %[[ITOP:.*]] = inttoptr i64 %[[PHI]] to i32*
+; CHECK: %[[CMP:.*]] = icmp eq i32* %[[ITOP]], null
+; CHECK: %[[IV_NEXT]] = add i64 %[[PHI]], -4
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 483becc0e7b8..c6d6690e4302 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -28,7 +28,7 @@ exit:
; sure they aren't marked as post-inc users.
;
; CHECK-LABEL: IV Users for loop %test2.loop
-; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
+; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
define i32 @test2() {
entry:
br label %test2.loop
diff --git a/test/Transforms/LoopStrengthReduce/sext-ind-var.ll b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
new file mode 100644
index 000000000000..3cf8f536fa71
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
@@ -0,0 +1,140 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; LSR used not to be able to generate a float* induction variable in
+; these cases due to scalar evolution not propagating nsw from an
+; instruction to the SCEV, preventing distributing sext into the
+; corresponding addrec.
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i + offset];
+;
+define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testadd
+; CHECK: sext i32 %offset to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = add nuw nsw i32 %i, %offset
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sub i32 0, %offset
+; CHECK: sext i32
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = sub nuw nsw i32 %i, %offset
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = mul nuw nsw i32 %i, %stride
+ %index64 = sext i32 %index32 to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
+
+; Test this pattern:
+;
+; for (int i = 0; i < numIterations; ++i)
+; sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+ %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+ %index32 = shl nuw nsw i32 %i, 7
+ %index32mul = mul nuw nsw i32 %index32, 3
+ %index64 = sext i32 %index32mul to i64
+ %ptr = getelementptr inbounds float, float* %input, i64 %index64
+ %addend = load float, float* %ptr, align 4
+ %nextsum = fadd float %sum, %addend
+ %nexti = add nuw nsw i32 %i, 1
+ %exitcond = icmp eq i32 %nexti, %numIterations
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret float %nextsum
+}
diff --git a/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
new file mode 100644
index 000000000000..3cbb70274da5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test_unroll_convergent_barrier(
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK-NOT: br
+define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
+ %arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
+ %load = load i32, i32 addrspace(1)* %arrayidx.in
+ call void @llvm.AMDGPU.barrier.global() #1
+ %add = add i32 %load, %sum.02
+ store i32 %add, i32 addrspace(1)* %arrayidx.out
+ %indvars.iv.next = add i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 4
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare void @llvm.AMDGPU.barrier.global() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll
index 4566f792deb4..104a38779e52 100644
--- a/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/test/Transforms/LoopUnroll/X86/partial.ll
@@ -86,17 +86,20 @@ for.body: ; preds = %entry, %for.body
%reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
- %add = add i16 %0, %reduction.026
+ %mul = shl i16 %0, 1
+ %add = add i16 %mul, %reduction.026
%sext = mul i64 %indvars.iv, 12884901888
%idxprom3 = ashr exact i64 %sext, 32
%arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
%1 = load i16, i16* %arrayidx4, align 2
- %add7 = add i16 %add, %1
+ %mul2 = shl i16 %1, 1
+ %add7 = add i16 %add, %mul2
%sext28 = mul i64 %indvars.iv, 21474836480
%idxprom10 = ashr exact i64 %sext28, 32
%arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
%2 = load i16, i16* %arrayidx11, align 2
- %add14 = add i16 %add7, %2
+ %mul3 = shl i16 %2, 1
+ %add14 = add i16 %add7, %mul3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll b/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
deleted file mode 100644
index ac814526647e..000000000000
--- a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Check that we don't crash on corner cases.
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo1() {
-entry:
- br label %for.body
-
-for.body:
- %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %idx = zext i32 undef to i64
- %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
- %inc = add nuw nsw i64 %phi, 1
- %cmp = icmp ult i64 %inc, 999
- br i1 %cmp, label %for.body, label %for.exit
-
-for.exit:
- ret void
-}
-
-define void @foo2() {
-entry:
- br label %for.body
-
-for.body:
- %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %inc = add nuw nsw i64 %phi, 1
- %cmp = icmp ult i64 %inc, 999
- br i1 %cmp, label %for.body, label %for.exit
-
-for.exit:
- ret void
-}
diff --git a/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
new file mode 100644
index 000000000000..e932851042ad
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
@@ -0,0 +1,102 @@
+; Check that we don't crash on corner cases.
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+define void @foo1() {
+entry:
+ br label %for.body
+
+for.body:
+ %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %idx = zext i32 undef to i64
+ %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
+ %inc = add nuw nsw i64 %phi, 1
+ %cmp = icmp ult i64 %inc, 999
+ br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+ ret void
+}
+
+define void @foo2() {
+entry:
+ br label %for.body
+
+for.body:
+ %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ %inc = add nuw nsw i64 %phi, 1
+ %cmp = icmp ult i64 %inc, 999
+ br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+ ret void
+}
+
+define void @cmp_undef() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %x1, undef
+ br i1 %cmp, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret void
+}
+
+define void @switch() {
+entry:
+ br label %for.body
+
+for.body:
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ switch i32 %x1, label %l1 [
+ ]
+
+l1:
+ %x2 = add i32 %x1, 2
+ br label %for.inc
+
+for.inc:
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define <4 x i32> @vec_load() {
+entry:
+ br label %for.body
+
+for.body:
+ %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
+ %bc = bitcast i32* %arrayidx to <4 x i32>*
+ %x = load <4 x i32>, < 4 x i32>* %bc, align 4
+ %r = add <4 x i32> %x, %vec_phi
+ %inc = add nuw nsw i64 %phi, 1
+ %cmp = icmp ult i64 %inc, 999
+ br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+ ret <4 x i32> %r
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
new file mode 100644
index 000000000000..5df48e8c380b
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+@weak_constant = weak unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; Though @unknown_global is initialized with constant values, we can't consider
+; it as a constant, so we shouldn't unroll the loop.
+; CHECK-LABEL: @foo
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+define i32 @foo(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 9
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
+
+; Similarly, we can't consider 'weak' symbols as a known constant value, so we
+; shouldn't unroll the loop.
+; CHECK-LABEL: @foo2
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+define i32 @foo2(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 9
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
new file mode 100644
index 000000000000..cd8cfd75424f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1], align 16
+
+; We should be able to propagate constant data through different types of
+; casts. For example, in this test we have a load, which becomes constant after
+; unrolling, which then is truncated to i8. Obviously, truncated value is also a
+; constant, which can be used in the further simplifications.
+;
+; We expect this loop to be unrolled, because in this case load would become
+; constant, which is 0 in many cases, and which, in its turn, helps to simplify
+; following multiplication and addition. In total, unrolling should help to
+; optimize ~60% of all instructions in this case.
+;
+; CHECK-LABEL: @const_load_trunc
+; CHECK-NOT: br i1
+; CHECK: ret i8 %
+define i8 @const_load_trunc(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i8 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %x = trunc i32 %src_element to i8
+ %y = trunc i32 %const_array_element to i8
+ %mul = mul nsw i8 %x, %y
+ %add = add nsw i8 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 10
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i8 [ %r, %loop ]
+ ret i8 %r.lcssa
+}
+
+; The same test as before, but with ZEXT instead of TRUNC.
+; CHECK-LABEL: @const_load_zext
+; CHECK-NOT: br i1
+; CHECK: ret i64 %
+define i64 @const_load_zext(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i64 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %x = zext i32 %src_element to i64
+ %y = zext i32 %const_array_element to i64
+ %mul = mul nsw i64 %x, %y
+ %add = add nsw i64 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 10
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i64 [ %r, %loop ]
+ ret i64 %r.lcssa
+}
+
+; The same test as the first one, but with SEXT instead of TRUNC.
+; CHECK-LABEL: @const_load_sext
+; CHECK-NOT: br i1
+; CHECK: ret i64 %
+define i64 @const_load_sext(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i64 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %x = sext i32 %src_element to i64
+ %y = sext i32 %const_array_element to i64
+ %mul = mul nsw i64 %x, %y
+ %add = add nsw i64 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, 10
+ br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end: ; preds = %loop
+ %r.lcssa = phi i64 [ %r, %loop ]
+ ret i64 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
new file mode 100644
index 000000000000..f7758fa22008
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
@@ -0,0 +1,207 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+; We should be able to propagate constant data through comparisons.
+; For example, in this test we have a load, which becomes constant after
+; unrolling, making comparison with 0 also known to be 0 (false) - and that
+; will trigger further simplifications.
+;
+; We expect this loop to be unrolled, because in this case load would become
+; constant, which is always 1, and which, in its turn, helps to simplify
+; following comparison, zero-extension, and addition. In total, unrolling should help to
+; optimize more than 50% of all instructions in this case.
+;
+; CHECK-LABEL: @const_compare
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @const_compare(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+ %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %x1, 0
+ %cast = zext i1 %cmp to i32
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %r.1 = add i32 %r.0, %cast
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i32 %r.1
+}
+
+; If we can figure out result of comparison on each iteration, we can resolve
+; the depending branch. That means, that the unrolled version of the loop would
+; have less code, because we don't need not-taken basic blocks there.
+; This test checks that this is taken into consideration.
+; We expect this loop to be unrolled, because the most complicated part of its
+; body (if.then block) is never actually executed.
+; CHECK-LABEL: @branch_folded
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @branch_folded(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+ %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %cmp = icmp eq i32 %x1, 0
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ br i1 %cmp, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+ %x2 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %x2, %r.0
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ]
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i32 %r.1
+}
+
+; This test is similar to the previous one, but in this we use IV in comparison
+; (not a loaded value as we did there).
+; CHECK-LABEL: @branch_iv
+; CHECK-NOT: br i1 %
+; CHECK: ret i64
+define i64 @branch_iv(i64* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
+ %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ]
+ %cmp3 = icmp eq i64 %indvars.iv, 5
+ %tmp3 = add nuw nsw i64 %indvars.iv, 1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3
+ %tmp1 = load i64, i64* %arrayidx2, align 4
+ %add = add nsw i64 %tmp1, %r.030
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %for.body
+ %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ]
+ %exitcond = icmp eq i64 %tmp3, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i64 %r.1
+}
+
+; Induction variables are often casted to another type, and that shouldn't
+; prevent us from folding branches. Tthis test specifically checks if we can
+; handle this. Other than thatm it's similar to the previous test.
+; CHECK-LABEL: @branch_iv_trunc
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
+ %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+ %tmp2 = trunc i64 %indvars.iv to i32
+ %cmp3 = icmp eq i32 %tmp2, 5
+ %tmp3 = add nuw nsw i64 %indvars.iv, 1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3
+ %tmp1 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %tmp1, %r.030
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %for.body
+ %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ]
+ %exitcond = icmp eq i64 %tmp3, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret i32 %r.1
+}
+
+; Check that we don't crash when we analyze icmp with pointer-typed IV and a
+; pointer.
+; CHECK-LABEL: @ptr_cmp_crash
+; CHECK: ret void
+define void @ptr_cmp_crash() {
+entry:
+ br label %while.body
+
+while.body:
+ %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
+ %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
+ %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
+ br i1 %exitcond, label %loop.exit, label %while.body
+
+loop.exit:
+ ret void
+}
+
+; Check that we don't crash when we analyze ptrtoint cast.
+; CHECK-LABEL: @ptrtoint_cast_crash
+; CHECK: ret void
+define void @ptrtoint_cast_crash(i8 * %a) {
+entry:
+ %limit = getelementptr i8, i8* %a, i64 512
+ br label %loop.body
+
+loop.body:
+ %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+ %cast = ptrtoint i8* %iv.0 to i64
+ %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+ %exitcond = icmp ne i8* %iv.1, %limit
+ br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+ ret void
+}
+
+; Loop unroller should be able to predict that a comparison would become
+; constant if the operands are pointers with the same base and constant
+; offsets.
+; We expect this loop to be unrolled, since most of its instructions would
+; become constant after it.
+; CHECK-LABEL: @ptr_cmp
+; CHECK-NOT: br i1 %
+; CHECK: ret i64
+define i64 @ptr_cmp(i8 * %a) {
+entry:
+ %limit = getelementptr i8, i8* %a, i64 40
+ %start.iv2 = getelementptr i8, i8* %a, i64 7
+ br label %loop.body
+
+loop.body:
+ %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+ %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]
+ %r.0 = phi i64 [ 0, %entry ], [ %r.1, %loop.body ]
+ %cast = ptrtoint i8* %iv.0 to i64
+ %cmp = icmp eq i8* %iv2.0, %iv.0
+ %sub = sext i1 %cmp to i64
+ %mul = mul i64 %sub, %cast
+ %r.1 = add i64 %r.0, %mul
+ %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+ %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1
+ %exitcond = icmp ne i8* %iv.1, %limit
+ br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+ ret i64 %r.1
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
new file mode 100644
index 000000000000..dd8582e6877f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i64 @propagate_loop_phis() {
+; CHECK-LABEL: @propagate_loop_phis(
+; CHECK-NOT: br i1
+; CHECK: ret i64 3
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]
+ %x1 = or i64 %x0, 1
+ %x2 = or i64 %x1, 2
+ %inc = add nuw nsw i64 %iv, 1
+ %cond = icmp sge i64 %inc, 10
+ br i1 %cond, label %loop.end, label %loop
+
+loop.end:
+ %x.lcssa = phi i64 [ %x2, %loop ]
+ ret i64 %x.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/pr18861.ll b/test/Transforms/LoopUnroll/pr18861.ll
index 62f26106afb2..c01eef1d0e5d 100644
--- a/test/Transforms/LoopUnroll/pr18861.ll
+++ b/test/Transforms/LoopUnroll/pr18861.ll
@@ -2,42 +2,85 @@
@b = external global i32, align 4
+; Test that LoopUnroll does not break LCSSA form.
+;
+; In this function we have a following CFG:
+; ( entry )
+; |
+; v
+; ( outer.header ) <--
+; | \
+; v |
+; --> ( inner.header ) |
+; / / \ |
+; \ / \ |
+; \ v v /
+; ( inner.latch ) ( outer.latch )
+; |
+; v
+; ( exit )
+;
+; When the inner loop is unrolled, we inner.latch block has only one
+; predecessor and one successor, so it can be merged with exit block.
+; During the merge, however, we remove an LCSSA definition for
+; %storemerge1.lcssa, breaking LCSSA form for the outer loop.
+
; Function Attrs: nounwind uwtable
define void @fn1() #0 {
entry:
- br label %for.cond1thread-pre-split
-
-for.cond1thread-pre-split: ; preds = %for.inc8, %entry
- %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ]
- br i1 undef, label %for.inc8, label %for.cond2.preheader.lr.ph
+ br label %outer.header
-for.cond2.preheader.lr.ph: ; preds = %for.cond1thread-pre-split
- br label %for.cond2.preheader
+outer.header: ; preds = %outer.latch, %entry
+ %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %outer.latch ]
+ br label %inner.header
-for.cond2.preheader: ; preds = %for.inc5, %for.cond2.preheader.lr.ph
- br label %for.cond2
-
-for.cond2: ; preds = %for.body3, %for.cond2.preheader
- %storemerge = phi i32 [ %add, %for.body3 ], [ 0, %for.cond2.preheader ]
+inner.header: ; preds = %inner.latch, %outer.header
+ %storemerge = phi i32 [ %add, %inner.latch ], [ 0, %outer.header ]
%cmp = icmp slt i32 %storemerge, 1
- br i1 %cmp, label %for.body3, label %for.inc5
+ br i1 %cmp, label %inner.latch, label %outer.latch
-for.body3: ; preds = %for.cond2
+inner.latch: ; preds = %inner.header
%tobool4 = icmp eq i32 %storemerge, 0
%add = add nsw i32 %storemerge, 1
- br i1 %tobool4, label %for.cond2, label %if.then
+ br i1 %tobool4, label %inner.header, label %exit
-if.then: ; preds = %for.body3
- store i32 %storemerge1, i32* @b, align 4
+exit: ; preds = %inner.latch
+ %storemerge1.lcssa = phi i32 [ %storemerge1, %inner.latch ]
+ store i32 %storemerge1.lcssa, i32* @b, align 4
ret void
-for.inc5: ; preds = %for.cond2
- br i1 undef, label %for.cond1.for.inc8_crit_edge, label %for.cond2.preheader
+outer.latch: ; preds = %inner.header
+ %inc9 = add nsw i32 %storemerge1, 1
+ br label %outer.header
+}
-for.cond1.for.inc8_crit_edge: ; preds = %for.inc5
- br label %for.inc8
+; This case is similar to the previous one, and has the same CFG.
+; The difference is that loop unrolling doesn't remove any LCSSA definition,
+; yet breaks LCSSA form for the outer loop. It happens because before unrolling
+; block inner.latch was inside outer loop (and consequently, didn't require
+; LCSSA definition for %x), but after unrolling it occurs out of the outer
+; loop, so we need to insert an LCSSA definition to keep LCSSA.
-for.inc8: ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1thread-pre-split
- %inc9 = add nsw i32 %storemerge1, 1
- br label %for.cond1thread-pre-split
+; Function Attrs: nounwind uwtable
+define void @fn2() {
+entry:
+ br label %outer.header
+
+outer.header:
+ br label %inner.header
+
+inner.header:
+ %x = load i32, i32* undef, align 4
+ br i1 true, label %outer.latch, label %inner.latch
+
+inner.latch:
+ %inc6 = add nsw i32 %x, 1
+ store i32 %inc6, i32* undef, align 4
+ br i1 false, label %inner.header, label %exit
+
+exit:
+ ret void
+
+outer.latch:
+ br label %outer.header
}
diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
new file mode 100644
index 000000000000..49498492344a
--- /dev/null
+++ b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test shows how unrolling an inner loop could break LCSSA for an outer
+; loop, and there is no cheap way to recover it.
+;
+; In this case the inner loop, L3, is being unrolled. It only runs one
+; iteration, so unrolling basically means replacing
+; br i1 true, label %exit, label %L3_header
+; with
+; br label %exit
+;
+; However, this change messes up the loops structure: for instance, block
+; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA
+; phis for definitions in L2 should now be placed there. In particular, we need
+; to insert such a definition for %y1.
+
+; CHECK-LABEL: @foo1
+define void @foo1() {
+entry:
+ br label %L1_header
+
+L1_header:
+ br label %L2_header
+
+L2_header:
+ %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ]
+ br label %L3_header
+
+L3_header:
+ %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ]
+ %x = add i64 undef, -1
+ br i1 true, label %L2_latch, label %L3_body
+
+L2_latch:
+ %x.lcssa = phi i64 [ %x, %L3_header ]
+ br label %L2_header
+
+; CHECK: L3_body:
+; CHECK-NEXT: %y1.lcssa = phi i64 [ %y1, %L3_header ]
+L3_body:
+ store i64 %y1, i64* undef
+ br i1 false, label %L3_latch, label %L1_latch
+
+L3_latch:
+ br i1 true, label %exit, label %L3_header
+
+L1_latch:
+ %y.lcssa = phi i64 [ %y2, %L3_body ]
+ br label %L1_header
+
+exit:
+ ret void
+}
+
+; Additional tests for some corner cases.
+;
+; CHECK-LABEL: @foo2
+define void @foo2() {
+entry:
+ br label %L1_header
+
+L1_header:
+ br label %L2_header
+
+L2_header:
+ %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ]
+ br label %L3_header
+
+L3_header:
+ %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ]
+ %dec_us = add i64 undef, -1
+ br i1 true, label %L2_header, label %L3_break_to_L1
+
+; CHECK: L3_break_to_L1:
+; CHECK-NEXT: %a.lcssa = phi i64 [ %a, %L3_header ]
+L3_break_to_L1:
+ br i1 false, label %L3_latch, label %L1_latch
+
+L1_latch:
+ %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ]
+ br label %L1_header
+
+L3_latch:
+ br i1 true, label %Exit, label %L3_header
+
+Exit:
+ ret void
+}
+
+; CHECK-LABEL: @foo3
+define void @foo3() {
+entry:
+ br label %L1_header
+
+L1_header:
+ %a = phi i8* [ %b, %L1_latch ], [ null, %entry ]
+ br i1 undef, label %L2_header, label %L1_latch
+
+L2_header:
+ br i1 undef, label %L2_latch, label %L1_latch
+
+; CHECK: L2_latch:
+; CHECK-NEXT: %a.lcssa = phi i8* [ %a, %L2_header ]
+L2_latch:
+ br i1 true, label %L2_exit, label %L2_header
+
+L1_latch:
+ %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ]
+ br label %L1_header
+
+L2_exit:
+ %a_lcssa1 = phi i8* [ %a, %L2_latch ]
+ br label %Exit
+
+Exit:
+ %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ]
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index de61e847a5a7..dcf159a09a1d 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -15,7 +15,7 @@
; CHECK-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
; CHECK-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
-define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
entry:
%cmp1 = icmp eq i32 %n, 0, !dbg !7
br i1 %cmp1, label %for.end, label %for.body, !dbg !7
@@ -44,7 +44,7 @@ for.end: ; preds = %for.body, %entry
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!6 = !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32*, i32)* @test, variables: !3)
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
!7 = !DILocation(line: 100, column: 1, scope: !6)
!8 = !DILocation(line: 101, column: 1, scope: !6)
!9 = !DILocation(line: 102, column: 1, scope: !6)
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 8e0d77513cc1..b915b4fdf489 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -256,3 +256,69 @@ for.end: ; preds = %for.body
ret void
}
!12 = !{!12, !4}
+
+; #pragma clang loop unroll(enable)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_enable(
+; CHECK-NOT: br i1
+define void @loop64_with_enable(i32* nocapture %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
+
+for.end: ; preds = %for.body
+ ret void
+}
+!13 = !{!13, !14}
+!14 = !{!"llvm.loop.unroll.enable"}
+
+; #pragma clang loop unroll(enable)
+; Loop has a runtime trip count and should be runtime unrolled and duplicated
+; (original and 8x).
+;
+; CHECK-LABEL: @runtime_loop_with_enable(
+; CHECK: for.body.prol:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: for.body:
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
+entry:
+ %cmp3 = icmp sgt i32 %b, 0
+ br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %b
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+!15 = !{!15, !14}
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index abeea209f639..a35596aff11c 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -34,7 +34,7 @@
; CHECK-NEXT: br label %loop_begin.us1
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us
-; CHECK-NEXT: %var_val.us.2 = load i32, i32* %var
+; CHECK-NEXT: %var_val.us2 = load i32, i32* %var
; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [
; CHECK-NEXT: i32 1, label %inc.us4
; CHECK-NEXT: i32 2, label %dec.us3
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 02552ea5cc40..20f03c987eb7 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -65,7 +65,7 @@
; CHECK-NEXT: br label %loop_begin.us1
; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us
-; CHECK-NEXT: %var_val.us.2 = load i32, i32* %var
+; CHECK-NEXT: %var_val.us2 = load i32, i32* %var
; CHECK-NEXT: switch i32 %c, label %second_switch.us3 [
; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us
; CHECK-NEXT: ]
diff --git a/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll b/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
new file mode 100644
index 000000000000..0f74614da98e
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+ %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+ %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+ br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i: ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it
+; will try to find the base object to prove deferenceability. If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK: %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+ %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+ %cmp1.i = icmp eq i16 %0, 0
+ br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit: ; preds = %for.body.i, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll b/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
new file mode 100644
index 000000000000..dd436f474ea0
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+entry:
+ br label %loop_begin
+
+loop_begin:
+ %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+ %cond2 = icmp eq i32 %var3, 10
+ %cond.and = and i1 %cond1, %cond2
+
+; %cond.and only has %cond1 as LIV so no unswitch should happen.
+; CHECK: br i1 %cond.and, label %do_something, label %loop_exit
+ br i1 %cond.and, label %do_something, label %loop_exit
+
+do_something:
+ %var2 = add i32 %var3, 1
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func() noreturn \ No newline at end of file
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index e990144d5ccc..a02a463764dd 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -64,5 +64,44 @@ loop_exit:
; CHECK: }
}
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32, i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+ call void @conv() convergent
+
+ switch i32 %c, label %default [
+ i32 1, label %inc
+ i32 2, label %dec
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+dec:
+ call void @decf() noreturn nounwind
+ br label %loop_begin
+default:
+ br label %loop_exit
+loop_exit:
+ ret i32 0
+; CHECK: }
+}
+
+
declare void @incf() noreturn
declare void @decf() noreturn
+declare void @conv() convergent
diff --git a/test/Transforms/LoopUnswitch/cleanuppad.ll b/test/Transforms/LoopUnswitch/cleanuppad.ll
new file mode 100644
index 000000000000..b06ebd7235c8
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/cleanuppad.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -loop-unswitch < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %tobool = icmp eq i32 %doit, 0
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ br i1 %x, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ br i1 %tobool, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull: ; preds = %if.then
+ invoke void @g()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %delete.notnull
+ br label %for.inc
+
+lpad: ; preds = %delete.notnull
+ %cp = cleanuppad within none []
+ cleanupret from %cp unwind to caller
+
+for.inc: ; preds = %invoke.cont, %if.then, %for.body
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }
diff --git a/test/Transforms/LoopUnswitch/cold-loop.ll b/test/Transforms/LoopUnswitch/cold-loop.ll
new file mode 100644
index 000000000000..1fbc08038bbd
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/cold-loop.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s
+
+;; trivial condition should be unswithed regardless of coldness.
+define i32 @test1(i1 %cond1, i1 %cond2) !prof !1 {
+ br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit.loopexit
+ br i1 %cond2, label %continue, label %loop_exit ; trivial condition
+
+continue:
+ call void @some_func1() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+;; cold non-trivial condition should not be unswitched.
+define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) !prof !1 {
+ br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+ store i32 1, i32* %var
+; CHECK: br i1 %cond2, label %continue1, label %continue2
+ br i1 %cond2, label %continue1, label %continue2 ; non-trivial condition
+
+continue1:
+ call void @some_func1() noreturn nounwind
+ br label %joint
+
+continue2:
+ call void @some_func2() noreturn nounwind
+ br label %joint
+
+joint:
+;; unswitching will duplicate these calls.
+ call void @some_func3() noreturn nounwind
+ call void @some_func4() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func1() noreturn
+declare void @some_func2() noreturn
+declare void @some_func3() noreturn
+declare void @some_func4() noreturn
+
+!0 = !{!"branch_weights", i32 1, i32 100000000}
+!1 = !{!"function_entry_count", i64 100}
diff --git a/test/Transforms/LoopUnswitch/copy-metadata.ll b/test/Transforms/LoopUnswitch/copy-metadata.ll
new file mode 100644
index 000000000000..2a634c25a23d
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/copy-metadata.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-unswitch -S < %s 2>&1 | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+
+define i32 @test(i1 %cond) {
+; CHECK: br i1 %cond, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0
+ br label %loop_begin
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit, !make.implicit !0
+ br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+
+continue:
+ call void @some_func()
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func()
+
+!0 = !{}
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index e79d874d9ca6..3d1c895edec9 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -9,23 +9,23 @@
; It can trivially unswitch on the false cas of condition %a though.
; STATS: 2 loop-unswitch - Number of branches unswitched
-; STATS: 1 loop-unswitch - Number of unswitches that are trivial
+; STATS: 2 loop-unswitch - Number of unswitches that are trivial
; CHECK-LABEL: @func_16(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
; CHECK: entry.split:
-; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1
+; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split
-; CHECK: cond.end.us:
-; CHECK-NEXT: br label %cond.end.us
+; CHECK: cond.end:
+; CHECK-NEXT: br label %cond.end
; CHECK: abort0.split:
; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
; CHECK-NEXT: unreachable
-; CHECK: abort1:
+; CHECK: abort1.split:
; CHECK-NEXT: call void @end1() [[NOR_NUW]]
; CHECK-NEXT: unreachable
diff --git a/test/Transforms/LoopUnswitch/trivial-unswitch.ll b/test/Transforms/LoopUnswitch/trivial-unswitch.ll
new file mode 100644
index 000000000000..db3328278dae
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/trivial-unswitch.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -verify-loop-info -S < %s 2>&1 | FileCheck %s
+
+; This test contains two trivial unswitch condition in one loop.
+; LoopUnswitch pass should be able to unswitch the second one
+; after unswitching the first one.
+
+
+; CHECK: br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge
+
+; CHECK: ..split_crit_edge: ; preds = %0
+; CHECK: br label %.split
+
+; CHECK: .split: ; preds = %..split_crit_edge
+; CHECK: br i1 %cond2, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge
+
+; CHECK: .split..split.split_crit_edge: ; preds = %.split
+; CHECK: br label %.split.split
+
+; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
+; CHECK: br label %loop_begin
+
+; CHECK: loop_begin: ; preds = %do_something, %.split.split
+; CHECK: br i1 true, label %continue, label %loop_exit
+
+; CHECK: continue: ; preds = %loop_begin
+; CHECK: %var_val = load i32, i32* %var
+; CHECK: br i1 true, label %do_something, label %loop_exit
+
+define i32 @test(i32* %var, i1 %cond1, i1 %cond2) {
+ br label %loop_begin
+
+loop_begin:
+ br i1 %cond1, label %continue, label %loop_exit ; first trivial condition
+
+continue:
+ %var_val = load i32, i32* %var
+ br i1 %cond2, label %do_something, label %loop_exit ; second trivial condition
+
+do_something:
+ call void @some_func() noreturn nounwind
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @some_func() noreturn \ No newline at end of file
diff --git a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
index f16ee4171da9..58315a73ec13 100644
--- a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
+++ b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses=true | FileCheck %s
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -enable-interleaved-mem-accesses=true | FileCheck %s --check-prefix=FORCE-VEC
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"
diff --git a/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
new file mode 100644
index 000000000000..65f5c4e6266b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -0,0 +1,54 @@
+; RUN: opt -S < %s -loop-vectorize -instcombine 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+;; See https://llvm.org/bugs/show_bug.cgi?id=25490
+;; Due to the data structures used, the LLVM IR was not determinisic.
+;; This test comes from the PR.
+
+;; CHECK-LABEL: @test(
+; CHECK: load <16 x i8>
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+define void @test(i32 %n, i8* nocapture %a, i8* nocapture %b, i8* nocapture readonly %c) {
+entry:
+ %cmp.28 = icmp eq i32 %n, 0
+ br i1 %cmp.28, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %c, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %mul = mul nuw nsw i32 %conv3, %conv
+ %shr.26 = lshr i32 %mul, 8
+ %conv4 = trunc i32 %shr.26 to i8
+ store i8 %conv4, i8* %arrayidx2, align 1
+ %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+ %2 = load i8, i8* %arrayidx8, align 1
+ %conv9 = zext i8 %2 to i32
+ %mul10 = mul nuw nsw i32 %conv9, %conv
+ %shr11.27 = lshr i32 %mul10, 8
+ %conv12 = trunc i32 %shr11.27 to i8
+ store i8 %conv12, i8* %arrayidx8, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
new file mode 100644
index 000000000000..a0e741a3cdbe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+@AB = common global [1024 x i8] zeroinitializer, align 4
+@CD = common global [1024 x i8] zeroinitializer, align 4
+
+define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
+entry:
+ br label %for.body
+
+; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
+; access group is 2.
+
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
+ %tmp = load i8, i8* %arrayidx0, align 4
+ %tmp1 = or i64 %indvars.iv, 1
+ %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
+ %tmp2 = load i8, i8* %arrayidx1, align 4
+ %add = add nsw i8 %tmp, %C
+ %mul = mul nsw i8 %tmp2, %D
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
+ store i8 %mul, i8* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
new file mode 100644
index 000000000000..eee310491805
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -0,0 +1,243 @@
+; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: @add_a(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = add nuw nsw i32 %conv, 2
+ %conv1 = trunc i32 %add to i8
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv1, i8* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_b(
+; CHECK: load <8 x i16>, <8 x i16>*
+; CHECK: add nuw nsw <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp9 = icmp sgt i32 %len, 0
+ br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx
+ %conv8 = zext i16 %0 to i32
+ %add = add nuw nsw i32 %conv8, 2
+ %conv1 = trunc i32 %add to i16
+ %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+ store i16 %conv1, i16* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_c(
+; CHECK: load <8 x i8>, <8 x i8>*
+; CHECK: add nuw nsw <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = add nuw nsw i32 %conv, 2
+ %conv1 = trunc i32 %add to i16
+ %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+ store i16 %conv1, i16* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_d(
+; CHECK: load <4 x i16>
+; CHECK: add nsw <4 x i32>
+; CHECK: store <4 x i32>
+define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp7 = icmp sgt i32 %len, 0
+ br i1 %cmp7, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx
+ %conv = sext i16 %0 to i32
+ %add = add nsw i32 %conv, 2
+ %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_e(
+; CHECK: load <16 x i8>
+; CHECK: shl <16 x i8>
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: or <16 x i8>
+; CHECK: mul nuw nsw <16 x i8>
+; CHECK: and <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: mul nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+ %cmp.32 = icmp sgt i32 %len, 0
+ br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %conv11 = zext i8 %arg2 to i32
+ %conv13 = zext i8 %arg1 to i32
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = shl i32 %conv, 4
+ %conv2 = add nuw nsw i32 %add, 32
+ %or = or i32 %conv, 51
+ %mul = mul nuw nsw i32 %or, 60
+ %and = and i32 %conv2, %conv13
+ %mul.masked = and i32 %mul, 252
+ %conv17 = xor i32 %mul.masked, %conv11
+ %mul18 = mul nuw nsw i32 %conv17, %and
+ %conv19 = trunc i32 %mul18 to i8
+ %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv19, i8* %arrayidx21
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_f
+; CHECK: load <8 x i16>
+; CHECK: trunc <8 x i16>
+; CHECK: shl <8 x i8>
+; CHECK: add nsw <8 x i8>
+; CHECK: or <8 x i8>
+; CHECK: mul nuw nsw <8 x i8>
+; CHECK: and <8 x i8>
+; CHECK: xor <8 x i8>
+; CHECK: mul nuw nsw <8 x i8>
+; CHECK: store <8 x i8>
+define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+ %cmp.32 = icmp sgt i32 %len, 0
+ br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %conv11 = zext i8 %arg2 to i32
+ %conv13 = zext i8 %arg1 to i32
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx
+ %conv = sext i16 %0 to i32
+ %add = shl i32 %conv, 4
+ %conv2 = add nsw i32 %add, 32
+ %or = and i32 %conv, 204
+ %conv8 = or i32 %or, 51
+ %mul = mul nuw nsw i32 %conv8, 60
+ %and = and i32 %conv2, %conv13
+ %mul.masked = and i32 %mul, 252
+ %conv17 = xor i32 %mul.masked, %conv11
+ %mul18 = mul nuw nsw i32 %conv17, %and
+ %conv19 = trunc i32 %mul18 to i8
+ %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv19, i8* %arrayidx21
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_g
+; CHECK: load <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: icmp ult <16 x i8>
+; CHECK: select <16 x i1> {{.*}}, <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture %r, i8 %arg1, i32 %len) #0 {
+ %1 = icmp sgt i32 %len, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = sext i8 %arg1 to i64
+ br label %3
+
+._crit_edge: ; preds = %3, %0
+ ret void
+
+; <label>:3 ; preds = %3, %.lr.ph
+ %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
+ %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %x5 = load i8, i8* %x4
+ %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ %x8 = load i8, i8* %x7
+ %x9 = zext i8 %x5 to i32
+ %x10 = xor i32 %x9, 255
+ %x11 = icmp ult i32 %x10, 24
+ %x12 = select i1 %x11, i32 %x10, i32 24
+ %x13 = trunc i32 %x12 to i8
+ store i8 %x13, i8* %x4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %._crit_edge, label %3
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
new file mode 100644
index 000000000000..be08a63b212c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @reduction_i8
+;
+; char reduction_i8(char *a, char *b, int n) {
+; char sum = 0;
+; for (int i = 0; i < n; ++i)
+; sum += (a[i] + b[i]);
+; return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK: phi <16 x i8>
+; CHECK: load <16 x i8>
+; CHECK: load <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: add <16 x i8>
+;
+; CHECK: middle.block:
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: shufflevector <16 x i8>
+; CHECK: add <16 x i8>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8>
+; CHECK: zext i8 [[Rdx]] to i32
+;
+define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+entry:
+ %cmp.12 = icmp sgt i32 %n, 0
+ br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ %conv6 = trunc i32 %add5.lcssa to i8
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+ ret i8 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %conv4 = and i32 %sum.013, 255
+ %add = add nuw nsw i32 %conv, %conv4
+ %add5 = add nuw nsw i32 %add, %conv3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
+
+; CHECK-LABEL: @reduction_i16_1
+;
+; short reduction_i16_1(short *a, short *b, int n) {
+; short sum = 0;
+; for (int i = 0; i < n; ++i)
+; sum += (a[i] + b[i]);
+; return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK: phi <8 x i16>
+; CHECK: load <8 x i16>
+; CHECK: load <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: add <8 x i16>
+;
+; CHECK: middle.block:
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK: zext i16 [[Rdx]] to i32
+;
+define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
+entry:
+ %cmp.16 = icmp sgt i32 %n, 0
+ br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ %conv6 = trunc i32 %add5.lcssa to i16
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+ ret i16 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv
+ %0 = load i16, i16* %arrayidx, align 2
+ %conv.14 = zext i16 %0 to i32
+ %arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv
+ %1 = load i16, i16* %arrayidx2, align 2
+ %conv3.15 = zext i16 %1 to i32
+ %conv4.13 = and i32 %sum.017, 65535
+ %add = add nuw nsw i32 %conv.14, %conv4.13
+ %add5 = add nuw nsw i32 %add, %conv3.15
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
+
+; CHECK-LABEL: @reduction_i16_2
+;
+; short reduction_i16_2(char *a, char *b, int n) {
+; short sum = 0;
+; for (int i = 0; i < n; ++i)
+; sum += (a[i] + b[i]);
+; return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK: phi <8 x i16>
+; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <8 x i8>
+; CHECK: zext <8 x i8> [[Ld1]] to <8 x i16>
+; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <8 x i8>
+; CHECK: zext <8 x i8> [[Ld2]] to <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: add <8 x i16>
+;
+; CHECK: middle.block:
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: shufflevector <8 x i16>
+; CHECK: add <8 x i16>
+; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK: zext i16 [[Rdx]] to i32
+;
+define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+entry:
+ %cmp.14 = icmp sgt i32 %n, 0
+ br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ %add5.lcssa = phi i32 [ %add5, %for.body ]
+ %conv6 = trunc i32 %add5.lcssa to i16
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+ ret i16 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %conv4.13 = and i32 %sum.015, 65535
+ %add = add nuw nsw i32 %conv, %conv4.13
+ %add5 = add nuw nsw i32 %add, %conv3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
new file mode 100644
index 000000000000..de3626b57d83
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "armv8--linux-gnueabihf"
+
+@AB = common global [1024 x i8] zeroinitializer, align 4
+@CD = common global [1024 x i8] zeroinitializer, align 4
+
+define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
+entry:
+ br label %for.body
+
+; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
+; access group is 2.
+
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
+ %tmp = load i8, i8* %arrayidx0, align 4
+ %tmp1 = or i64 %indvars.iv, 1
+ %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
+ %tmp2 = load i8, i8* %arrayidx1, align 4
+ %add = add nsw i8 %tmp, %C
+ %mul = mul nsw i8 %tmp2, %D
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
+ store i8 %mul, i8* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64 %indvars.iv.next, 1024
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/vector_cast.ll b/test/Transforms/LoopVectorize/ARM/vector_cast.ll
new file mode 100644
index 000000000000..78af9960e064
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/vector_cast.ll
@@ -0,0 +1,37 @@
+; RUN: opt -loop-vectorize -tbaa -S -mattr=+neon < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabi"
+
+; This requires the loop vectorizer to create an interleaved access group
+; for the stores to the struct. Here we need to perform a bitcast from a vector
+; of pointers to a vector i32s.
+
+%class.A = type { i8*, i32 }
+
+; CHECK-LABEL: test0
+define void @test0(%class.A* %StartPtr, %class.A* %APtr) {
+entry:
+ br label %for.body.i
+
+for.body.i:
+ %addr = phi %class.A* [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ]
+ %Data.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 0
+ store i8* null, i8** %Data.i.i, align 4, !tbaa !8
+ %Length.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 1
+ store i32 0, i32* %Length.i.i, align 4, !tbaa !11
+ %incdec.ptr.i = getelementptr inbounds %class.A, %class.A* %addr, i32 1
+ %cmp.i = icmp eq %class.A* %incdec.ptr.i, %APtr
+ br i1 %cmp.i, label %exit, label %for.body.i
+
+exit:
+ ret void
+}
+
+!5 = !{!"any pointer", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !5, i64 0}
+!9 = !{!5, i64 0, !10, i64 4}
+!10 = !{!"int", !6, i64 0}
+!11 = !{!9, !10, i64 4}
diff --git a/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll b/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
new file mode 100644
index 000000000000..3491e08bbaa2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 8
+ %mul = fmul double %0, 2.000000e+00
+ %mul3 = fmul double %0, %mul
+ %arrayidx5 = getelementptr inbounds double, double* %c, i64 %indvars.iv
+ %1 = load double, double* %arrayidx5, align 8
+ %mul6 = fmul double %1, 3.000000e+00
+ %mul9 = fmul double %1, %mul6
+ %add = fadd double %mul3, %mul9
+ %mul12 = fmul double %0, 4.000000e+00
+ %mul15 = fmul double %mul12, %1
+ %add16 = fadd double %mul15, %add
+ %add17 = fadd double %add16, 1.000000e+00
+ %arrayidx19 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add17, double* %arrayidx19, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
new file mode 100644
index 000000000000..0cb845520246
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: <2 x double>
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+ %1 = load double, double* %arrayidx, align 8
+ %add = fadd double %1, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 8c375ccfd315..abe7d6de3f35 100644
--- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -499,4 +499,146 @@ for.end: ; preds = %for.cond
ret void
}
+; void foo7 (double * __restrict__ out, double ** __restrict__ in,
+; bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+; }
+
+;AVX512-LABEL: @foo7
+;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca double**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store double** %in, double*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load double**, double*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1
+ %7 = load double*, double** %arrayidx2, align 8
+ %cmp3 = icmp ne double* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+;typedef int (*fp)();
+;void foo8 (double* __restrict__ out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
+;
+; for (unsigned i=0; i<size; i++)
+; if (trigger[i] && (in[i] != 0))
+; out[i] = (double) 0.5;
+;}
+
+;AVX512-LABEL: @foo8
+;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+ %out.addr = alloca double*, align 8
+ %in.addr = alloca i32 ()**, align 8
+ %trigger.addr = alloca i8*, align 8
+ %size.addr = alloca i32, align 4
+ %i = alloca i32, align 4
+ store double* %out, double** %out.addr, align 8
+ store i32 ()** %in, i32 ()*** %in.addr, align 8
+ store i8* %trigger, i8** %trigger.addr, align 8
+ store i32 %size, i32* %size.addr, align 4
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %size.addr, align 4
+ %cmp = icmp ult i32 %0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %2 = load i32, i32* %i, align 4
+ %idxprom = zext i32 %2 to i64
+ %3 = load i8*, i8** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+ %4 = load i8, i8* %arrayidx, align 1
+ %tobool = trunc i8 %4 to i1
+ br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4
+ %idxprom1 = zext i32 %5 to i64
+ %6 = load i32 ()**, i32 ()*** %in.addr, align 8
+ %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1
+ %7 = load i32 ()*, i32 ()** %arrayidx2, align 8
+ %cmp3 = icmp ne i32 ()* %7, null
+ br i1 %cmp3, label %if.then, label %if.end
+
+if.then: ; preds = %land.lhs.true
+ %8 = load i32, i32* %i, align 4
+ %idxprom4 = zext i32 %8 to i64
+ %9 = load double*, double** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+ store double 5.000000e-01, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %land.lhs.true, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %10 = load i32, i32* %i, align 4
+ %inc = add i32 %10, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index ba8e11e58749..74c0c16086fe 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -60,7 +60,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
for.end: ; preds = %for.body
@@ -111,7 +111,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
@@ -162,7 +162,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %add, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 32
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
for.end: ; preds = %for.body
diff --git a/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
new file mode 100644
index 000000000000..0bb78ce177fe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations
+; CHECK: remark: no_fpmath.c:6:14: loop not vectorized:
+; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2)
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind readonly ssp uwtable
+define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 {
+entry:
+ %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
+ br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !9
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ %add.lcssa = phi double [ %add, %for.body ]
+ br label %for.cond.cleanup, !dbg !10
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+ ret double %a.0.lcssa, !dbg !10
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+ %cmp1 = icmp eq i32 %0, 0, !dbg !15
+ %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
+ %add = fadd double %a.08, %cond, !dbg !16
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8
+ %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17
+}
+
+; Function Attrs: nounwind readonly ssp uwtable
+define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 {
+entry:
+ %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
+ br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !22
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ %add.lcssa = phi double [ %add, %for.body ]
+ br label %for.cond.cleanup, !dbg !23
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+ ret double %a.0.lcssa, !dbg !23
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
+ %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+ %cmp1 = icmp eq i32 %0, 0, !dbg !24
+ %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
+ %add = fadd double %a.08, %cond, !dbg !25
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+ %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !21
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !21, !llvm.loop !26
+}
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{i32 1, !"PIC Level", i32 2}
+!2 = !{!"clang version 3.7.0"}
+!3 = !DILocation(line: 5, column: 20, scope: !4)
+!4 = distinct !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DIFile(filename: "no_fpmath.c", directory: "")
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 5, column: 3, scope: !4)
+!9 = !DILocation(line: 6, column: 14, scope: !4)
+!10 = !DILocation(line: 9, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 6, column: 19, scope: !4)
+!16 = !DILocation(line: 6, column: 11, scope: !4)
+!17 = distinct !{!17, !18}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = !DILocation(line: 16, column: 20, scope: !20)
+!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!21 = !DILocation(line: 16, column: 3, scope: !20)
+!22 = !DILocation(line: 17, column: 14, scope: !20)
+!23 = !DILocation(line: 20, column: 3, scope: !20)
+!24 = !DILocation(line: 17, column: 19, scope: !20)
+!25 = !DILocation(line: 17, column: 11, scope: !20)
+!26 = distinct !{!26, !27, !18}
+!27 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll
index 6bc738a7d143..3e4bef6d4d07 100644
--- a/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -6,10 +6,10 @@ target triple = "x86_64-unknown-linux-gnu"
@Foo = common global %struct.anon zeroinitializer, align 4
-;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>, <4 x i32>*
-;CHECK: sdiv <4 x i32>
-;CHECK: store <4 x i32>
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: sdiv <4 x i32>
+; CHECK: store <4 x i32>
define void @foo(){
entry:
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index 3741b95d9859..6393002d5071 100644
--- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -mcpu=prescott < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -mcpu=prescott -disable-basicaa < %s | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
target triple = "i386-apple-darwin"
diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll
new file mode 100644
index 000000000000..47a6e1029eda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1024 x i8] zeroinitializer, align 16
+@b = global [1024 x i8] zeroinitializer, align 16
+
+define i32 @foo() {
+; This function has a loop of SAD pattern. Here we check when VF = 16 the
+; register usage doesn't exceed 16.
+;
+; CHECK-LABEL: foo
+; CHECK: LV(REG): VF = 4
+; CHECK-NEXT: LV(REG): Found max usage: 4
+; CHECK: LV(REG): VF = 8
+; CHECK-NEXT: LV(REG): Found max usage: 7
+; CHECK: LV(REG): VF = 16
+; CHECK-NEXT: LV(REG): Found max usage: 13
+
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ ret i32 %add.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %sub = sub nsw i32 %conv, %conv3
+ %ispos = icmp sgt i32 %sub, -1
+ %neg = sub nsw i32 0, %sub
+ %2 = select i1 %ispos, i32 %sub, i32 %neg
+ %add = add nsw i32 %2, %s.015
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @bar(i64* nocapture %a) {
+; CHECK-LABEL: bar
+; CHECK: LV(REG): VF = 2
+; CHECK: LV(REG): Found max usage: 4
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ %add2.lcssa = phi i64 [ %add2, %for.body ]
+ ret i64 %add2.lcssa
+
+for.body:
+ %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
+ %0 = load i64, i64* %arrayidx, align 8
+ %add = add nsw i64 %0, %i.012
+ store i64 %add, i64* %arrayidx, align 8
+ %add2 = add nsw i64 %add, %s.011
+ %inc = add nuw nsw i64 %i.012, 1
+ %exitcond = icmp eq i64 %inc, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
new file mode 100644
index 000000000000..fe9d59efc8b3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -0,0 +1,46 @@
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1000 x i8] zeroinitializer, align 16
+@b = global [1000 x i8] zeroinitializer, align 16
+@c = global [1000 x i8] zeroinitializer, align 16
+@u = global [1000 x i32] zeroinitializer, align 16
+@v = global [1000 x i32] zeroinitializer, align 16
+@w = global [1000 x i32] zeroinitializer, align 16
+
+; Tests that the vectorization factor is determined by the smallest instead of
+; widest type in the loop for maximum bandwidth when
+; -vectorizer-maximize-bandwidth is indicated.
+;
+; CHECK-label: foo
+; CHECK: LV: Selecting VF: 32.
+define void @foo() {
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds [1000 x i8], [1000 x i8]* @b, i64 0, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx2 = getelementptr inbounds [1000 x i8], [1000 x i8]* @c, i64 0, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %add = add i8 %1, %0
+ %arrayidx6 = getelementptr inbounds [1000 x i8], [1000 x i8]* @a, i64 0, i64 %indvars.iv
+ store i8 %add, i8* %arrayidx6, align 1
+ %arrayidx8 = getelementptr inbounds [1000 x i32], [1000 x i32]* @v, i64 0, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds [1000 x i32], [1000 x i32]* @w, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %3, %2
+ %arrayidx13 = getelementptr inbounds [1000 x i32], [1000 x i32]* @u, i64 0, i64 %indvars.iv
+ store i32 %add11, i32* %arrayidx13, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 6cd3c9c3bc01..cca829b9457e 100644
--- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -17,7 +17,7 @@ target triple = "x86_64-apple-macosx10.8.0"
; widest vector count.
;
; CHECK: test_consecutive_store
-; CHECK: The Widest type: 64 bits
+; CHECK: The Smallest and Widest types: 64 / 64 bits.
define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
%4 = load %0*, %0** %2, align 8
%5 = icmp eq %0** %0, %1
@@ -51,7 +51,7 @@ define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwt
; p[i][y] = (int*) (1 + q[i]);
; }
; CHECK: test_nonconsecutive_store
-; CHECK: The Widest type: 16 bits
+; CHECK: The Smallest and Widest types: 16 / 16 bits.
define void @test_nonconsecutive_store() nounwind ssp uwtable {
br label %1
@@ -93,7 +93,7 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
;; Now we check the same rules for loads. We should take consecutive loads of
;; pointer types into account.
; CHECK: test_consecutive_ptr_load
-; CHECK: The Widest type: 64 bits
+; CHECK: The Smallest and Widest types: 8 / 64 bits.
define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
br label %1
@@ -117,7 +117,7 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
;; However, we should not take unconsecutive loads of pointers into account.
; CHECK: test_nonconsecutive_ptr_load
-; CHECK: The Widest type: 16 bits
+; CHECK: LV: The Smallest and Widest types: 16 / 16 bits.
define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
br label %1
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 65cabb05f2fb..02fab4447341 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -25,7 +25,7 @@
; File, line, and column should match those specified in the metadata
; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
@@ -45,7 +45,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
entry:
%cmp10 = icmp sgt i32 %Length, 0, !dbg !12
br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
@@ -67,7 +67,7 @@ for.end: ; preds = %for.body, %entry
}
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
entry:
%cmp4 = icmp sgt i32 %Length, 0, !dbg !25
br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
@@ -87,7 +87,7 @@ for.end: ; preds = %for.body, %entry
}
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
entry:
%cmp9 = icmp sgt i32 %Length, 0, !dbg !32
br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
@@ -122,15 +122,15 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4, !7, !8}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z4testPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z13test_disabledPii, variables: !2)
-!8 = !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, variables: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5.0"}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
new file mode 100644
index 000000000000..df8c668f1262
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -loop-vectorize -pass-remarks-analysis='loop-vectorize' -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
+
+; Verify analysis remarks are generated when interleaving is not beneficial.
+; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that interleaving is not beneficial and is explicitly disabled or interleave count is set to 1
+; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that interleaving is not beneficial
+
+; First loop.
+; #pragma clang loop interleave(disable) unroll(disable)
+; for(int i = 0; i < n; i++) {
+; out[i] = *in[i];
+; }
+
+; Second loop.
+; #pragma clang loop unroll(disable)
+; for(int i = 0; i < n; i++) {
+; out[i] = *in[i];
+; }
+
+; ModuleID = 'vectorization-remarks-profitable.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind uwtable
+define void @do_not_interleave(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !4 {
+entry:
+ %cmp.4 = icmp eq i32 %size, 0, !dbg !10
+ br i1 %cmp.4, label %for.end, label %for.body.preheader, !dbg !11
+
+for.body.preheader: ; preds = %entry
+ br label %for.body, !dbg !12
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !12
+ %0 = bitcast float** %arrayidx to i32**, !dbg !12
+ %1 = load i32*, i32** %0, align 8, !dbg !12
+ %2 = load i32, i32* %1, align 4, !dbg !13
+ %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !14
+ %3 = bitcast float* %arrayidx2 to i32*, !dbg !15
+ store i32 %2, i32* %3, align 4, !dbg !15
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !11
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !11
+ %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !11
+ br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !11, !llvm.loop !16
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end, !dbg !19
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void, !dbg !19
+}
+
+; Function Attrs: nounwind uwtable
+define void @interleave_not_profitable(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !6 {
+entry:
+ %cmp.4 = icmp eq i32 %size, 0, !dbg !20
+ br i1 %cmp.4, label %for.end, label %for.body, !dbg !21
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !22
+ %0 = bitcast float** %arrayidx to i32**, !dbg !22
+ %1 = load i32*, i32** %0, align 8, !dbg !22
+ %2 = load i32, i32* %1, align 4, !dbg !23
+ %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !24
+ %3 = bitcast float* %arrayidx2 to i32*, !dbg !25
+ store i32 %2, i32* %3, align 4, !dbg !25
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+ %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !21
+ br i1 %exitcond, label %for.end, label %for.body, !dbg !21, !llvm.loop !26
+
+for.end: ; preds = %for.body, %entry
+ ret void, !dbg !27
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250016)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "vectorization-remarks-profitable.c", directory: "")
+!2 = !{}
+!3 = !{!4, !6}
+!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250016)"}
+!10 = !DILocation(line: 4, column: 23, scope: !4)
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 17, scope: !4)
+!13 = !DILocation(line: 5, column: 16, scope: !4)
+!14 = !DILocation(line: 5, column: 7, scope: !4)
+!15 = !DILocation(line: 5, column: 14, scope: !4)
+!16 = distinct !{!16, !17, !18}
+!17 = !{!"llvm.loop.interleave.count", i32 1}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = !DILocation(line: 6, column: 1, scope: !4)
+!20 = !DILocation(line: 11, column: 23, scope: !6)
+!21 = !DILocation(line: 11, column: 3, scope: !6)
+!22 = !DILocation(line: 12, column: 17, scope: !6)
+!23 = !DILocation(line: 12, column: 16, scope: !6)
+!24 = !DILocation(line: 12, column: 7, scope: !6)
+!25 = !DILocation(line: 12, column: 14, scope: !6)
+!26 = distinct !{!26, !18}
+!27 = !DILocation(line: 13, column: 1, scope: !6)
+
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 8640950be32e..77a405ebb434 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -9,13 +9,13 @@
; DEBUG-OUTPUT-NOT: .loc
; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
-; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1)
-; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved by 4 (vectorization not beneficial)
+; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-define i32 @foo(i32 %n) #0 {
+define i32 @foo(i32 %n) #0 !dbg !4 {
entry:
%diff = alloca i32, align 4
%cb = alloca [16 x i8], align 16
@@ -52,7 +52,7 @@ declare void @ibar(i32*) #1
!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll
index f41f08df07a6..8d820e277b26 100644
--- a/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
-define void @conditional_store(i32* noalias nocapture %indices) #0 {
+define void @conditional_store(i32* noalias nocapture %indices) #0 !dbg !4 {
entry:
br label %for.body, !dbg !10
@@ -36,11 +36,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @conditional_store, variables: !2)
+!4 = distinct !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
index c56f9122e462..a2fc69a6e907 100644
--- a/test/Transforms/LoopVectorize/control-flow.ll
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' 2>&1 | FileCheck %s
; C/C++ code for control flow test
; int test(int *A, int Length) {
@@ -20,7 +20,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
-define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
entry:
%cmp8 = icmp sgt i32 %Length, 0, !dbg !10
br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
@@ -55,11 +55,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32*, i32)* @_Z4testPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index c7440f84b2c9..f68b6865b072 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
@C = global [1024 x i32] zeroinitializer, align 16
; CHECK-LABEL: @test(
-define i32 @test() #0 {
+define i32 @test() #0 !dbg !3 {
entry:
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !DIExpression()), !dbg !18
br label %for.body, !dbg !18
@@ -44,16 +44,16 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!26}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
!1 = !{}
!2 = !{!3}
-!3 = !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, function: i32 ()* @test, variables: !8)
+!3 = distinct !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, variables: !8)
!4 = !DIFile(filename: "test", directory: "/path/to/somewhere")
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !10, file: !4, type: !7)
+!9 = !DILocalVariable(name: "i", line: 6, scope: !10, file: !4, type: !7)
!10 = distinct !DILexicalBlock(line: 6, column: 0, file: !25, scope: !3)
!11 = !{!12, !16, !17}
!12 = !DIGlobalVariable(name: "A", line: 1, isLocal: false, isDefinition: true, scope: null, file: !4, type: !13, variable: [1024 x i32]* @A)
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index e691afdd6933..0214f1c4847c 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -12,12 +12,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK: load <2 x i32>, <2 x i32>* {{.*}}, !dbg ![[LOC2]]
; CHECK: add <2 x i32> {{.*}}, !dbg ![[LOC2]]
; CHECK: add i64 %index, 2, !dbg ![[LOC]]
-; CHECK: icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
+; CHECK: icmp eq i64 %index.next, %n.vec, !dbg ![[LOC]]
; CHECK: middle.block
-; CHECK: add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg ![[LOC2]]
+; CHECK: add <2 x i32> %{{.*}}, %rdx.shuf, !dbg ![[LOC2]]
; CHECK: extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC2]]
-define i32 @f(i32* nocapture %a, i32 %size) #0 {
+define i32 @f(i32* nocapture %a, i32 %size) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
tail call void @llvm.dbg.value(metadata i32 %size, i64 0, metadata !14, metadata !DIExpression()), !dbg !19
@@ -63,11 +63,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !27}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "-", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: i32 (i32*, i32)* @f, variables: !12)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !12)
!5 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!6 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!7 = !DISubroutineType(types: !8)
@@ -76,10 +76,10 @@ attributes #1 = { nounwind readnone }
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!12 = !{!13, !14, !15, !16}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !4, file: !6, type: !10)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "size", line: 3, arg: 2, scope: !4, file: !6, type: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "sum", line: 4, scope: !4, file: !6, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 5, scope: !17, file: !6, type: !11)
+!13 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !6, type: !10)
+!14 = !DILocalVariable(name: "size", line: 3, arg: 2, scope: !4, file: !6, type: !11)
+!15 = !DILocalVariable(name: "sum", line: 4, scope: !4, file: !6, type: !11)
+!16 = !DILocalVariable(name: "i", line: 5, scope: !17, file: !6, type: !11)
!17 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !4)
!18 = !{i32 2, !"Dwarf Version", i32 3}
!19 = !DILocation(line: 3, scope: !4)
diff --git a/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
new file mode 100644
index 000000000000..ab2fd5e4e1c6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Vectorization of loop with bitcast between GEP and load
+; Simplified source code:
+;void foo (double** __restrict__ in, bool * __restrict__ res) {
+;
+; for (int i = 0; i < 4096; ++i)
+; res[i] = ((unsigned long long)in[i] == 0);
+;}
+
+; CHECK-LABEL: @foo
+; CHECK: vector.body
+; CHECK: %0 = getelementptr inbounds double*, double** %in, i64 %index
+; CHECK: %1 = bitcast double** %0 to <4 x i64>*
+; CHECK: %wide.load = load <4 x i64>, <4 x i64>* %1, align 8
+; CHECK: %2 = icmp eq <4 x i64> %wide.load, zeroinitializer
+; CHECK: br i1
+
+define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double*, double** %in, i64 %indvars.iv
+ %tmp53 = bitcast double** %arrayidx to i64*
+ %tmp54 = load i64, i64* %tmp53, align 8
+ %cmp1 = icmp eq i64 %tmp54, 0
+ %arrayidx3 = getelementptr inbounds i8, i8* %res, i64 %indvars.iv
+ %frombool = zext i1 %cmp1 to i8
+ store i8 %frombool, i8* %arrayidx3, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 4096
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+} \ No newline at end of file
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
index 991d027ada5c..0d70f557f834 100644
--- a/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,5 +1,8 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
+
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -14,27 +17,49 @@ entry:
; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
+; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
;
; VEC: [[cond]]:
-; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
-; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
; VEC: store i32 %[[v13]], i32* %[[v14]], align 4
; VEC: br label %[[else:.+]]
;
; VEC: [[else]]:
; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
+; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
;
; VEC: [[cond2]]:
-; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
-; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
; VEC: store i32 %[[v17]], i32* %[[v18]], align 4
; VEC: br label %[[else2:.+]]
;
; VEC: [[else2]]:
+; VEC-IC-LABEL: test
+; VEC-IC: %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
+; VEC-IC: %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
+; VEC-IC: %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0
+; VEC-IC: br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]
+;
+; VEC-IC: [[cond]]:
+; VEC-IC: %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0
+; VEC-IC: store i32 %[[v4]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else:.+]]
+;
+; VEC-IC: [[else]]:
+; VEC-IC: %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1
+; VEC-IC: br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]
+;
+; VEC-IC: [[cond2]]:
+; VEC-IC: %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1
+; VEC-IC: store i32 %[[v6]], i32* %{{.*}}, align 4
+; VEC-IC: br label %[[else2:.+]]
+;
+; VEC-IC: [[else2]]:
+
; UNROLL-LABEL: test
; UNROLL: vector.body:
; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
@@ -90,9 +115,9 @@ for.end:
; vectorized loop body.
; PR18724
-; UNROLL-LABEL: bug18724
-; UNROLL: store i32
-; UNROLL: store i32
+; UNROLL-NOSIMPLIFY-LABEL: bug18724
+; UNROLL-NOSIMPLIFY: store i32
+; UNROLL-NOSIMPLIFY: store i32
define void @bug18724() {
entry:
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 2fbb2de797ae..59ee66a4a35d 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -6,8 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-LABEL: @multi_int_induction(
; CHECK: vector.body:
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 0
-; CHECK: %[[VAR:.*]] = trunc i64 %normalized.idx to i32
+; CHECK: %[[VAR:.*]] = trunc i64 %index to i32
; CHECK: %offset.idx = add i32 190, %[[VAR]]
define void @multi_int_induction(i32* %A, i32 %N) {
for.body.lr.ph:
@@ -113,12 +112,11 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
; condition and branch directly to the scalar loop.
; CHECK-LABEL: max_i32_backedgetaken
-; CHECK: %backedge.overflow = icmp eq i32 -1, -1
-; CHECK: br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
; CHECK: scalar.ph:
-; CHECK: %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
-; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]
+; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %0 ]
+; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ 1, %min.iters.checked ], [ %5, %middle.block ]
define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
@@ -142,11 +140,10 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
; CHECK-LABEL: testoverflowcheck
; CHECK: entry
; CHECK: %[[LOAD:.*]] = load i8
-; CHECK: %[[VAL:.*]] = zext i8 %[[LOAD]] to i32
; CHECK: br
; CHECK: scalar.ph
-; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ]
@e = global i8 1, align 1
@d = common global i32 0, align 4
diff --git a/test/Transforms/LoopVectorize/miniters.ll b/test/Transforms/LoopVectorize/miniters.ll
new file mode 100644
index 000000000000..81cb2d4ca5a1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/miniters.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [1000 x i32] zeroinitializer, align 16
+@c = common global [1000 x i32] zeroinitializer, align 16
+@a = common global [1000 x i32] zeroinitializer, align 16
+
+; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
+; CHECK-LABEL: foo(
+; CHECK: %min.iters.check = icmp ult i64 %N, 4
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL-LABEL: foo(
+; UNROLL: %min.iters.check = icmp ult i64 %N, 8
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+
+define void @foo(i64 %N) {
+entry:
+ %cmp.8 = icmp sgt i64 %N, 0
+ br i1 %cmp.8, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
+ %tmp = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
+ %tmp1 = load i32, i32* %arrayidx1, align 4
+ %add = add nsw i32 %tmp1, %tmp
+ %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
+ store i32 %add, i32* %arrayidx2, align 4
+ %inc = add nuw nsw i64 %i.09, 1
+ %exitcond = icmp eq i64 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 5a0356fe11a2..19a401213fd5 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -412,10 +412,10 @@ for.end:
; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @max_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @max_red_float(float %max) #0 {
@@ -427,7 +427,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %max.red.08
+ %cmp3 = fcmp fast ogt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -438,10 +438,10 @@ for.end:
}
; CHECK-LABEL: @max_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @max_red_float_ge(float %max) #0 {
@@ -453,7 +453,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp oge float %0, %max.red.08
+ %cmp3 = fcmp fast oge float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -464,10 +464,10 @@ for.end:
}
; CHECK-LABEL: @inverted_max_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_max_red_float(float %max) #0 {
@@ -479,7 +479,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp olt float %0, %max.red.08
+ %cmp3 = fcmp fast olt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -490,10 +490,10 @@ for.end:
}
; CHECK-LABEL: @inverted_max_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_max_red_float_le(float %max) #0 {
@@ -505,7 +505,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ole float %0, %max.red.08
+ %cmp3 = fcmp fast ole float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -516,10 +516,10 @@ for.end:
}
; CHECK-LABEL: @unordered_max_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @unordered_max_red_float(float %max) #0 {
@@ -531,7 +531,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ugt float %0, %max.red.08
+ %cmp3 = fcmp fast ugt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -542,10 +542,10 @@ for.end:
}
; CHECK-LABEL: @unordered_max_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @unordered_max_red_float_ge(float %max) #0 {
@@ -557,7 +557,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp uge float %0, %max.red.08
+ %cmp3 = fcmp fast uge float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -568,10 +568,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_max_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_unordered_max_red_float(float %max) #0 {
@@ -583,7 +583,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ult float %0, %max.red.08
+ %cmp3 = fcmp fast ult float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -594,10 +594,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_max_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select i1
define float @inverted_unordered_max_red_float_le(float %max) #0 {
@@ -609,7 +609,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ule float %0, %max.red.08
+ %cmp3 = fcmp fast ule float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -623,10 +623,10 @@ for.end:
; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @min_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @min_red_float(float %min) #0 {
@@ -638,7 +638,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp olt float %0, %min.red.08
+ %cmp3 = fcmp fast olt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -649,10 +649,10 @@ for.end:
}
; CHECK-LABEL: @min_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @min_red_float_le(float %min) #0 {
@@ -664,7 +664,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ole float %0, %min.red.08
+ %cmp3 = fcmp fast ole float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -675,10 +675,10 @@ for.end:
}
; CHECK-LABEL: @inverted_min_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_min_red_float(float %min) #0 {
@@ -690,7 +690,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %min.red.08
+ %cmp3 = fcmp fast ogt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -701,10 +701,10 @@ for.end:
}
; CHECK-LABEL: @inverted_min_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_min_red_float_ge(float %min) #0 {
@@ -716,7 +716,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp oge float %0, %min.red.08
+ %cmp3 = fcmp fast oge float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -727,10 +727,10 @@ for.end:
}
; CHECK-LABEL: @unordered_min_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @unordered_min_red_float(float %min) #0 {
@@ -742,7 +742,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ult float %0, %min.red.08
+ %cmp3 = fcmp fast ult float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -753,10 +753,10 @@ for.end:
}
; CHECK-LABEL: @unordered_min_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @unordered_min_red_float_le(float %min) #0 {
@@ -768,7 +768,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ule float %0, %min.red.08
+ %cmp3 = fcmp fast ule float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -779,10 +779,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_min_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_unordered_min_red_float(float %min) #0 {
@@ -794,7 +794,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ugt float %0, %min.red.08
+ %cmp3 = fcmp fast ugt float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -805,10 +805,10 @@ for.end:
}
; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
; CHECK: select i1
define float @inverted_unordered_min_red_float_ge(float %min) #0 {
@@ -820,7 +820,7 @@ for.body:
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp uge float %0, %min.red.08
+ %cmp3 = fcmp fast uge float %0, %min.red.08
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -832,10 +832,10 @@ for.end:
; Make sure we handle doubles, too.
; CHECK-LABEL: @min_red_double(
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
; CHECK: select <2 x i1>
; CHECK: middle.block
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
; CHECK: select i1
define double @min_red_double(double %min) #0 {
@@ -847,7 +847,7 @@ for.body:
%min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
%0 = load double, double* %arrayidx, align 4
- %cmp3 = fcmp olt double %0, %min.red.08
+ %cmp3 = fcmp fast olt double %0, %min.red.08
%min.red.0 = select i1 %cmp3, double %0, double %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -871,7 +871,7 @@ for.body:
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
%arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
%0 = load float, float* %arrayidx, align 4
- %cmp3 = fcmp ogt float %0, %max.red.08
+ %cmp3 = fcmp fast ogt float %0, %max.red.08
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll
index f7c7ff7732b9..13cec71fc455 100644
--- a/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -17,7 +17,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind ssp uwtable
-define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 {
+define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 !dbg !4 {
entry:
%cmp25 = icmp sgt i32 %number, 0, !dbg !10
br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12
@@ -72,11 +72,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z4testPiS_i, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 7030b6b4df2d..2683b42dc717 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
index 1f139c26d790..842d262d3192 100644
--- a/test/Transforms/LoopVectorize/no_switch.ll
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -1,9 +1,17 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
-; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
+; NOANALYSIS-NOT: remark: {{.*}}
+; NOANALYSIS: warning: source.cpp:4:5: loop not interleaved: failed explicitly specified loop interleaving
+
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
+; MOREINFO: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
+
; CHECK: _Z11test_switchPii
; CHECK-NOT: x i32>
; CHECK: ret
@@ -11,7 +19,7 @@
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
entry:
%cmp18 = icmp sgt i32 %Length, 0, !dbg !10
br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -59,11 +67,11 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z11test_switchPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/nontemporal.ll b/test/Transforms/LoopVectorize/nontemporal.ll
new file mode 100644
index 000000000000..106b19031228
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nontemporal.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; CHECK-LABEL: @foo(
+define void @foo(float* noalias %a, float* noalias %b, float* noalias %c, i32 %N) {
+entry:
+ %cmp.4 = icmp sgt i32 %N, 0
+ br i1 %cmp.4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+
+; Check that we don't lose !nontemporal hint when vectorizing loads.
+; CHECK: %wide.load{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4, !nontemporal !0
+
+; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
+; CHECK: %wide.load{{[0-9]+}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ %arrayidx2 = getelementptr inbounds float, float* %c, i64 %indvars.iv
+ %1 = load float, float* %arrayidx2, align 4
+ %add = fadd float %0, %1
+
+; Check that we don't lose !nontemporal hint when vectorizing stores.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ store float %add, float* %arrayidx4, align 4, !nontemporal !0
+
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+; CHECK: ret void
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/LoopVectorize/optsize.ll b/test/Transforms/LoopVectorize/optsize.ll
index e183fda099a2..513657cd3723 100644
--- a/test/Transforms/LoopVectorize/optsize.ll
+++ b/test/Transforms/LoopVectorize/optsize.ll
@@ -1,18 +1,17 @@
; This test verifies that the loop vectorizer will NOT produce a tail
-; loop with Optimize for size attibute.
+; loop with the optimize for size or the minimize size attributes.
; REQUIRES: asserts
-; RUN: opt < %s -loop-vectorize -Os -debug -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
-
-;CHECK-NOT: <2 x i8>
-;CHECK-NOT: <4 x i8>
-;CHECK: Aborting. A tail loop is required in Os.
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
@tab = common global [32 x i8] zeroinitializer, align 1
-; Function Attrs: nounwind optsize
-define i32 @foo() #0 {
+define i32 @foo_optsize() #0 {
+; CHECK-LABEL: @foo_optsize(
+; CHECK-NOT: <2 x i8>
+; CHECK-NOT: <4 x i8>
+
entry:
br label %for.body
@@ -31,4 +30,30 @@ for.end: ; preds = %for.body
ret i32 0
}
-attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize }
+
+define i32 @foo_minsize() #1 {
+; CHECK-LABEL: @foo_minsize(
+; CHECK-NOT: <2 x i8>
+; CHECK-NOT: <4 x i8>
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, 202
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+attributes #1 = { minsize }
+
diff --git a/test/Transforms/LoopVectorize/ptr-induction.ll b/test/Transforms/LoopVectorize/ptr-induction.ll
new file mode 100644
index 000000000000..47d33352763d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr-induction.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; This testcase causes SCEV to return a pointer-typed exit value.
+
+; CHECK: @f
+; Expect that the pointer indvar has been converted into an integer indvar.
+; CHECK: %index.next = add i64 %index, 4
+define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+entry:
+ %cmp.6 = icmp ult i32* %a, %b
+ br i1 %cmp.6, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+ %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+ %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
+ %0 = load i32, i32* %incdec.ptr8, align 1
+ %add = add nuw nsw i32 %0, %acc.07
+ %exitcond = icmp eq i32* %incdec.ptr8, %b
+ br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+while.cond.while.end_crit_edge: ; preds = %while.body
+ %add.lcssa = phi i32 [ %add, %while.body ]
+ br label %while.end
+
+while.end: ; preds = %while.cond.while.end_crit_edge, %entry
+ %acc.0.lcssa = phi i32 [ %add.lcssa, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+ ret i32 %acc.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 647e58a7e41f..63b138f1d560 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -175,8 +175,8 @@ for.end: ; preds = %for.body, %entry
}
;CHECK-LABEL: @reduction_and(
-;CHECK: and <4 x i32>
;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: and <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
;CHECK: and <4 x i32>
;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 6b63a0d8db6c..88dd2e4d66ca 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -96,8 +96,7 @@ loopend:
; CHECK-LABEL: @reverse_forward_induction_i64_i8(
; CHECK: vector.body
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 0
-; CHECK: %offset.idx = sub i64 1023, %normalized.idx
+; CHECK: %offset.idx = sub i64 1023, %index
; CHECK: trunc i64 %index to i8
define void @reverse_forward_induction_i64_i8() {
@@ -122,10 +121,8 @@ while.end:
; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
; CHECK: vector.body:
-; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 129
-; CHECK: %offset.idx = sub i64 1023, %normalized.idx
-; CHECK: trunc i64 %index to i8
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i64 1023, %index
define void @reverse_forward_induction_i64_i8_signed() {
entry:
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 1f07d3f69594..3673b71db30d 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -11,9 +11,9 @@ target triple = "x86_64-apple-macosx10.9.0"
;CHECK-LABEL: define i32 @foo
;CHECK: for.body.preheader:
-;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
+;CHECK: br i1 %cmp.zero, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
;CHECK: vector.memcheck:
-;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph, !dbg [[BODY_LOC]]
+;CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph, !dbg [[BODY_LOC]]
;CHECK: load <4 x float>
define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
entry:
@@ -73,7 +73,7 @@ loopexit:
!2 = !{}
!3 = !DISubroutineType(types: !2)
!4 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!5 = !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
!6 = !DILocation(line: 100, column: 1, scope: !5)
!7 = !DILocation(line: 101, column: 1, scope: !5)
!8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index 6bc71e160ccd..a7f692cef170 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -1,12 +1,25 @@
-; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+
+; First loop produced diagnostic pass remark.
+;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic analysis remark.
+;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
+
+; First loop produced diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
; We are vectorizing with 6 runtime checks.
;CHECK-LABEL: func1x6(
;CHECK: <4 x i32>
;CHECK: ret
+;OVERRIDE-LABEL: func1x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
entry:
br label %for.body
@@ -41,6 +54,10 @@ for.end: ; preds = %for.body
;CHECK-LABEL: func2x6(
;CHECK-NOT: <4 x i32>
;CHECK: ret
+; We vectorize with 12 checks if a vectorization hint is provided.
+;OVERRIDE-LABEL: func2x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
entry:
br label %for.body
diff --git a/test/Transforms/LowerBitSets/function-ext.ll b/test/Transforms/LowerBitSets/function-ext.ll
new file mode 100644
index 000000000000..2a83bef2f074
--- /dev/null
+++ b/test/Transforms/LowerBitSets/function-ext.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that we correctly handle external references, including the case where
+; all functions in a bitset are external references.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @foo()
+
+; CHECK: @[[JT:.*]] = private constant [1 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @foo to i64), i64 ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
+
+define i1 @bar(i8* %ptr) {
+ ; CHECK: icmp eq i64 {{.*}}, ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
+ %p = call i1 @llvm.bitset.test(i8* %ptr, metadata !"void")
+ ret i1 %p
+}
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+!0 = !{!"void", void ()* @foo, i64 0}
+
+!llvm.bitsets = !{!0}
diff --git a/test/Transforms/LowerBitSets/function.ll b/test/Transforms/LowerBitSets/function.ll
new file mode 100644
index 000000000000..bf4043d61c41
--- /dev/null
+++ b/test/Transforms/LowerBitSets/function.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that we correctly create a jump table for bitsets containing 2 or more
+; functions.
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-p:64:64"
+
+; CHECK: @[[JT:.*]] = private constant [2 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[FNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>, <{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[GNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 13) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
+
+; CHECK: @f = alias void (), bitcast ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to void ()*)
+; CHECK: @g = alias void (), bitcast (<{ i8, i32, i8, i8, i8 }>* getelementptr inbounds ([2 x <{ i8, i32, i8, i8, i8 }>], [2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]], i64 0, i64 1) to void ()*)
+
+; CHECK: define private void @[[FNAME]]() {
+define void @f() {
+ ret void
+}
+
+; CHECK: define private void @[[GNAME]]() {
+define void @g() {
+ ret void
+}
+
+!0 = !{!"bitset1", void ()* @f, i32 0}
+!1 = !{!"bitset1", void ()* @g, i32 0}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+ ; CHECK: sub i64 {{.*}}, ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
+ %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
+ ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/nonstring.ll b/test/Transforms/LowerBitSets/nonstring.ll
new file mode 100644
index 000000000000..e61c9123e086
--- /dev/null
+++ b/test/Transforms/LowerBitSets/nonstring.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that non-string metadata nodes may be used as bitset identifiers.
+
+target datalayout = "e-p:32:32"
+
+; CHECK: @[[ANAME:.*]] = private constant { i32 }
+; CHECK: @[[BNAME:.*]] = private constant { [2 x i32] }
+
+@a = constant i32 1
+@b = constant [2 x i32] [i32 2, i32 3]
+
+!0 = !{!2, i32* @a, i32 0}
+!1 = !{!3, [2 x i32]* @b, i32 0}
+!2 = distinct !{}
+!3 = distinct !{}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK-LABEL: @foo
+define i1 @foo(i8* %p) {
+ ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ i32 }* @[[ANAME]] to i32)
+ %x = call i1 @llvm.bitset.test(i8* %p, metadata !2)
+ ret i1 %x
+}
+
+; CHECK-LABEL: @bar
+define i1 @bar(i8* %p) {
+ ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ [2 x i32] }* @[[BNAME]] to i32)
+ %x = call i1 @llvm.bitset.test(i8* %p, metadata !3)
+ ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/pr25902.ll b/test/Transforms/LowerBitSets/pr25902.ll
new file mode 100644
index 000000000000..b9a1203ec0e1
--- /dev/null
+++ b/test/Transforms/LowerBitSets/pr25902.ll
@@ -0,0 +1,21 @@
+; PR25902: gold plugin crash.
+; RUN: opt -mtriple=i686-pc -S -lowerbitsets < %s
+
+define void @f(void ()* %p) {
+entry:
+ %a = bitcast void ()* %p to i8*, !nosanitize !1
+ %b = call i1 @llvm.bitset.test(i8* %a, metadata !"_ZTSFvvE"), !nosanitize !1
+ ret void
+}
+
+define void @g() {
+entry:
+ ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+
+!llvm.bitsets = !{!0}
+
+!0 = !{!"_ZTSFvvE", void ()* @g, i64 0}
+!1 = !{}
diff --git a/test/Transforms/LowerBitSets/simple.ll b/test/Transforms/LowerBitSets/simple.ll
index 0fcdf0b36d63..a22d998e2008 100644
--- a/test/Transforms/LowerBitSets/simple.ll
+++ b/test/Transforms/LowerBitSets/simple.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32"
; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
@a = constant i32 1
-@b = constant [63 x i32] zeroinitializer
-@c = constant i32 3
+@b = hidden constant [63 x i32] zeroinitializer
+@c = protected constant i32 3
@d = constant [2 x i32] [i32 4, i32 5]
; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"
@@ -26,26 +26,26 @@ target datalayout = "e-p:32:32"
!4 = !{!"bitset2", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
+; Entries whose second operand is null (the result of a global being DCE'd)
+; should be ignored.
+!5 = !{!"bitset2", null, i32 0}
+
; Offset 0, 4 byte alignment
-!5 = !{!"bitset3", i32* @a, i32 0}
+!6 = !{!"bitset3", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
-!6 = !{!"bitset3", i32* @c, i32 0}
+!7 = !{!"bitset3", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}
-; Entries whose second operand is null (the result of a global being DCE'd)
-; should be ignored.
-!7 = !{!"bitset2", null, i32 0}
-
!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
-; CHECK: @bits_use{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
-; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
-; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
+; CHECK: @bits_use{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
-; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
-; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
-; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
-; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
+; CHECK: @a = alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = hidden alias [63 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = protected alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias [2 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
; CHECK-DARWIN: @aptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G:@[^ ]*]], i32 0, i32 0)
@aptr = constant i32* @a
@@ -61,8 +61,8 @@ target datalayout = "e-p:32:32"
; CHECK-DARWIN: [[G]] = private constant
-; CHECK: @bits{{[0-9]*}} = private alias getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
-; CHECK: @bits.{{[0-9]*}} = private alias getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index 73d9f44ee7e5..69e67cd7c1dd 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -1,5 +1,5 @@
; RUN: opt -lower-expect -strip-dead-prototypes -S -o - < %s | FileCheck %s
-; RUN: opt -S -passes=lower-expect < %s | opt -strip-dead-prototypes -S | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect),strip-dead-prototypes' < %s | FileCheck %s
; CHECK-LABEL: @test1(
define i32 @test1(i32 %x) nounwind uwtable ssp {
diff --git a/test/Transforms/LowerSwitch/delete-default-block-crash.ll b/test/Transforms/LowerSwitch/delete-default-block-crash.ll
new file mode 100644
index 000000000000..23588d56c335
--- /dev/null
+++ b/test/Transforms/LowerSwitch/delete-default-block-crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -disable-output
+
+; This test verify -lowerswitch does not crash after deleting the default block.
+
+declare i32 @f(i32)
+
+define i32 @unreachable(i32 %x) {
+
+entry:
+ switch i32 %x, label %unreachable [
+ i32 5, label %a
+ i32 6, label %a
+ i32 7, label %a
+ i32 10, label %b
+ i32 20, label %b
+ i32 30, label %b
+ i32 40, label %b
+ ]
+unreachable:
+ unreachable
+a:
+ %0 = call i32 @f(i32 0)
+ ret i32 %0
+b:
+ %1 = call i32 @f(i32 1)
+ ret i32 %1
+}
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index b82d93455436..09d25f0b06d4 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -4,49 +4,49 @@
; On output we should got binary comparison tree. Check that all is fine.
;CHECK: entry:
-;CHECK-NEXT: br label %NodeBlock.19
+;CHECK-NEXT: br label %NodeBlock19
-;CHECK: NodeBlock.19: ; preds = %entry
-;CHECK-NEXT: %Pivot.20 = icmp slt i32 %tmp158, 10
-;CHECK-NEXT: br i1 %Pivot.20, label %NodeBlock.5, label %NodeBlock.17
+;CHECK: NodeBlock19: ; preds = %entry
+;CHECK-NEXT: %Pivot20 = icmp slt i32 %tmp158, 10
+;CHECK-NEXT: br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
-;CHECK: NodeBlock.17: ; preds = %NodeBlock.19
-;CHECK-NEXT: %Pivot.18 = icmp slt i32 %tmp158, 13
-;CHECK-NEXT: br i1 %Pivot.18, label %NodeBlock.9, label %NodeBlock.15
+;CHECK: NodeBlock17: ; preds = %NodeBlock19
+;CHECK-NEXT: %Pivot18 = icmp slt i32 %tmp158, 13
+;CHECK-NEXT: br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
-;CHECK: NodeBlock.15: ; preds = %NodeBlock.17
-;CHECK-NEXT: %Pivot.16 = icmp slt i32 %tmp158, 14
-;CHECK-NEXT: br i1 %Pivot.16, label %bb330, label %NodeBlock.13
+;CHECK: NodeBlock15: ; preds = %NodeBlock17
+;CHECK-NEXT: %Pivot16 = icmp slt i32 %tmp158, 14
+;CHECK-NEXT: br i1 %Pivot16, label %bb330, label %NodeBlock13
-;CHECK: NodeBlock.13: ; preds = %NodeBlock.15
-;CHECK-NEXT: %Pivot.14 = icmp slt i32 %tmp158, 15
-;CHECK-NEXT: br i1 %Pivot.14, label %bb332, label %LeafBlock.11
+;CHECK: NodeBlock13: ; preds = %NodeBlock15
+;CHECK-NEXT: %Pivot14 = icmp slt i32 %tmp158, 15
+;CHECK-NEXT: br i1 %Pivot14, label %bb332, label %LeafBlock11
-;CHECK: LeafBlock.11: ; preds = %NodeBlock.13
+;CHECK: LeafBlock11: ; preds = %NodeBlock13
;CHECK-NEXT: %SwitchLeaf12 = icmp eq i32 %tmp158, 15
;CHECK-NEXT: br i1 %SwitchLeaf12, label %bb334, label %NewDefault
-;CHECK: NodeBlock.9: ; preds = %NodeBlock.17
-;CHECK-NEXT: %Pivot.10 = icmp slt i32 %tmp158, 11
-;CHECK-NEXT: br i1 %Pivot.10, label %bb324, label %NodeBlock.7
+;CHECK: NodeBlock9: ; preds = %NodeBlock17
+;CHECK-NEXT: %Pivot10 = icmp slt i32 %tmp158, 11
+;CHECK-NEXT: br i1 %Pivot10, label %bb324, label %NodeBlock7
-;CHECK: NodeBlock.7: ; preds = %NodeBlock.9
-;CHECK-NEXT: %Pivot.8 = icmp slt i32 %tmp158, 12
-;CHECK-NEXT: br i1 %Pivot.8, label %bb326, label %bb328
+;CHECK: NodeBlock7: ; preds = %NodeBlock9
+;CHECK-NEXT: %Pivot8 = icmp slt i32 %tmp158, 12
+;CHECK-NEXT: br i1 %Pivot8, label %bb326, label %bb328
-;CHECK: NodeBlock.5: ; preds = %NodeBlock.19
-;CHECK-NEXT: %Pivot.6 = icmp slt i32 %tmp158, 7
-;CHECK-NEXT: br i1 %Pivot.6, label %NodeBlock, label %NodeBlock.3
+;CHECK: NodeBlock5: ; preds = %NodeBlock19
+;CHECK-NEXT: %Pivot6 = icmp slt i32 %tmp158, 7
+;CHECK-NEXT: br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
-;CHECK: NodeBlock.3: ; preds = %NodeBlock.5
-;CHECK-NEXT: %Pivot.4 = icmp slt i32 %tmp158, 8
-;CHECK-NEXT: br i1 %Pivot.4, label %bb, label %NodeBlock.1
+;CHECK: NodeBlock3: ; preds = %NodeBlock5
+;CHECK-NEXT: %Pivot4 = icmp slt i32 %tmp158, 8
+;CHECK-NEXT: br i1 %Pivot4, label %bb, label %NodeBlock1
-;CHECK: NodeBlock.1: ; preds = %NodeBlock.3
-;CHECK-NEXT: %Pivot.2 = icmp slt i32 %tmp158, 9
-;CHECK-NEXT: br i1 %Pivot.2, label %bb338, label %bb322
+;CHECK: NodeBlock1: ; preds = %NodeBlock3
+;CHECK-NEXT: %Pivot2 = icmp slt i32 %tmp158, 9
+;CHECK-NEXT: br i1 %Pivot2, label %bb338, label %bb322
-;CHECK: NodeBlock: ; preds = %NodeBlock.5
+;CHECK: NodeBlock: ; preds = %NodeBlock5
;CHECK-NEXT: %Pivot = icmp slt i32 %tmp158, 0
;CHECK-NEXT: br i1 %Pivot, label %LeafBlock, label %bb338
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 8a2eedd96baf..6aaf594b3056 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -mem2reg -S | FileCheck %s
-define double @testfunc(i32 %i, double %j) nounwind ssp {
+define double @testfunc(i32 %i, double %j) nounwind ssp !dbg !1 {
entry:
%i_addr = alloca i32 ; <i32*> [#uses=2]
%j_addr = alloca double ; <double*> [#uses=2]
@@ -10,8 +10,8 @@ entry:
call void @llvm.dbg.declare(metadata i32* %i_addr, metadata !0, metadata !DIExpression()), !dbg !8
; CHECK: call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata ![[IVAR:[0-9]*]], metadata {{.*}})
; CHECK: call void @llvm.dbg.value(metadata double %j, i64 0, metadata ![[JVAR:[0-9]*]], metadata {{.*}})
-; CHECK: ![[IVAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i"
-; CHECK: ![[JVAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j"
+; CHECK: ![[IVAR]] = !DILocalVariable(name: "i"
+; CHECK: ![[JVAR]] = !DILocalVariable(name: "j"
store i32 %i, i32* %i_addr
call void @llvm.dbg.declare(metadata double* %j_addr, metadata !9, metadata !DIExpression()), !dbg !8
store double %j, double* %j_addr
@@ -35,16 +35,16 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!14}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4, function: double (i32, double)* @testfunc)
+!0 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4)
!2 = !DIFile(filename: "testfunc.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !{!1})
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DILocation(line: 2, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 2, arg: 0, scope: !1, file: !2, type: !6)
+!9 = !DILocalVariable(name: "j", line: 2, arg: 2, scope: !1, file: !2, type: !6)
!10 = !DILocation(line: 3, scope: !11)
!11 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !1)
!12 = !DIFile(filename: "testfunc.c", directory: "/tmp")
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 831221b7f97f..071d708e1fe8 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -1,10 +1,18 @@
-; RUN: opt -mem2reg < %s | llvm-dis | grep ".dbg " | count 7
+; RUN: opt -S -mem2reg <%s | FileCheck %s
declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @foo(i32, i64, i8*)
-define void @baz(i32 %a) nounwind ssp {
+define void @baz(i32 %a) nounwind ssp !dbg !1 {
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32{{$}}
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 55,{{.*}}, !dbg
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* bitcast (void (i32)* @baz to i8*),{{.*}}, !dbg
+; CHECK-NEXT: call void @foo({{.*}}, !dbg
+; CHECK-NEXT: br label %return, !dbg
entry:
%x_addr.i = alloca i32 ; <i32*> [#uses=2]
%y_addr.i = alloca i64 ; <i64*> [#uses=2]
@@ -26,30 +34,32 @@ entry:
call void @foo(i32 %1, i64 %2, i8* %3) nounwind, !dbg !18
br label %return, !dbg !19
+; CHECK-LABEL: return:
+; CHECK-NEXT: ret void, !dbg
return: ; preds = %entry
ret void, !dbg !19
}
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!22}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4, function: void (i32)* @baz)
+!0 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4)
!2 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21, subprograms: !{!1})
!4 = !DISubroutineType(types: !5)
!5 = !{null, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !DILocation(line: 8, scope: !1)
!8 = !DILocation(line: 9, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 4, arg: 0, scope: !10, file: !2, type: !6)
-!10 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
+!9 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !10, file: !2, type: !6)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{null, !6, !13, !14}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !20, scope: !2, baseType: null)
!15 = !DILocation(line: 4, scope: !10, inlinedAt: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 4, arg: 0, scope: !10, file: !2, type: !13)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "z", line: 4, arg: 0, scope: !10, file: !2, type: !14)
+!16 = !DILocalVariable(name: "y", line: 4, arg: 2, scope: !10, file: !2, type: !13)
+!17 = !DILocalVariable(name: "z", line: 4, arg: 3, scope: !10, file: !2, type: !14)
!18 = !DILocation(line: 5, scope: !10, inlinedAt: !8)
!19 = !DILocation(line: 10, scope: !1)
!20 = !DIFile(filename: "bar.c", directory: "/tmp/")
diff --git a/test/Transforms/Mem2Reg/optnone.ll b/test/Transforms/Mem2Reg/optnone.ll
new file mode 100644
index 000000000000..41ee77aff79d
--- /dev/null
+++ b/test/Transforms/Mem2Reg/optnone.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This function is optnone, so the allocas should not be eliminated.
+
+; CHECK-LABEL: @testfunc
+; CHECK: alloca
+; CHECK: alloca
+define double @testfunc(i32 %i, double %j) optnone noinline {
+ %I = alloca i32 ; <i32*> [#uses=4]
+ %J = alloca double ; <double*> [#uses=2]
+ store i32 %i, i32* %I
+ store double %j, double* %J
+ %t1 = load i32, i32* %I ; <i32> [#uses=1]
+ %t2 = add i32 %t1, 1 ; <i32> [#uses=1]
+ store i32 %t2, i32* %I
+ %t3 = load i32, i32* %I ; <i32> [#uses=1]
+ %t4 = sitofp i32 %t3 to double ; <double> [#uses=1]
+ %t5 = load double, double* %J ; <double> [#uses=1]
+ %t6 = fmul double %t4, %t5 ; <double> [#uses=1]
+ ret double %t6
+}
diff --git a/test/Transforms/Mem2Reg/pr24179.ll b/test/Transforms/Mem2Reg/pr24179.ll
new file mode 100644
index 000000000000..e4216ce4daa1
--- /dev/null
+++ b/test/Transforms/Mem2Reg/pr24179.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mem2reg < %s -S | FileCheck %s
+
+declare i32 @def(i32)
+declare i1 @use(i32)
+
+; Special case of a single-BB alloca does not apply here since the load
+; is affected by the following store. Expect this case to be identified
+; and a PHI node to be created.
+define void @test1() {
+; CHECK-LABEL: @test1(
+ entry:
+ %t = alloca i32
+ br label %loop
+
+ loop:
+ %v = load i32, i32* %t
+ %c = call i1 @use(i32 %v)
+; CHECK: [[PHI:%.*]] = phi i32 [ undef, %entry ], [ %n, %loop ]
+; CHECK: call i1 @use(i32 [[PHI]])
+ %n = call i32 @def(i32 7)
+ store i32 %n, i32* %t
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
+
+; Same as above, except there is no following store. The alloca should just be
+; replaced with an undef
+define void @test2() {
+; CHECK-LABEL: @test2(
+ entry:
+ %t = alloca i32
+ br label %loop
+
+ loop:
+ %v = load i32, i32* %t
+ %c = call i1 @use(i32 %v)
+; CHECK: %c = call i1 @use(i32 undef)
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 731847440d76..6181543cfc63 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -206,5 +206,6 @@ declare void @f1(%struct.big* nocapture sret)
declare void @f2(%struct.big*)
; CHECK: attributes [[NUW]] = { nounwind }
-; CHECK: attributes #1 = { nounwind ssp }
-; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #1 = { argmemonly nounwind }
+; CHECK: attributes #2 = { nounwind ssp }
+; CHECK: attributes #3 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MemCpyOpt/nontemporal.ll b/test/Transforms/MemCpyOpt/nontemporal.ll
new file mode 100644
index 000000000000..d9dafcc7b816
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/nontemporal.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we don't combine nontemporal stores into memset calls.
+
+define void @nontemporal_stores_1(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_1
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+ store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+ %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+ store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+ %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 2
+ store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+ %ptr3 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 3
+ store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+ %ptr4 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 4
+ store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+ %ptr5 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 5
+ store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+ %ptr6 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 6
+ store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+ %ptr7 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 7
+ store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+ ret void
+}
+
+define void @nontemporal_stores_2(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_2
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+ store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+ %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+ store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/MergeFunc/apply_function_attributes.ll b/test/Transforms/MergeFunc/apply_function_attributes.ll
new file mode 100644
index 000000000000..e9ede4518206
--- /dev/null
+++ b/test/Transforms/MergeFunc/apply_function_attributes.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+%Opaque_type = type opaque
+%S2i = type <{ i64, i64 }>
+%D2i = type <{ i64, i64 }>
+%Di = type <{ i32 }>
+%Si = type <{ i32 }>
+
+define void @B(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+ %x = load i32, i32* %xp
+ %y = load i32, i32* %yp
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = add i32 %sum2, %y
+ ret void
+}
+
+define void @C(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+ %x = load i32, i32* %xp
+ %y = load i32, i32* %yp
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = add i32 %sum2, %y
+ ret void
+}
+
+define void @A(%Opaque_type* sret %a, %D2i* %b, i32* %xp, i32* %yp) {
+ %x = load i32, i32* %xp
+ %y = load i32, i32* %yp
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = add i32 %sum2, %y
+ ret void
+}
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @C(%Opaque_type* sret
+; CHECK: tail call void bitcast (void (%Opaque_type*, %D2i*, i32*, i32*)* @A to void (%Opaque_type*, %S2i*, i32*, i32*)*)(%Opaque_type* sret %0, %S2i* %1, i32* %2, i32* %3)
+; CHECK: ret void
+
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @B(%Opaque_type* sret
+; CHECK: %5 = bitcast
+; CHECK: tail call void @A(%Opaque_type* sret %0, %D2i* %5, i32* %2, i32* %3)
+; CHECK: ret void
+
diff --git a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index b955e3c9582e..806ca3c17a6a 100644
--- a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -63,14 +63,6 @@ lpad:
resume { i8*, i32 } zeroinitializer
}
-define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
- bitcast i8 0 to i8
- %out = call i8 @dummy(), !range !0
- ret i8 %out
-}
-
define i8 @invoke_with_same_range() personality i8* undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
@@ -84,6 +76,16 @@ lpad:
resume { i8*, i32 } zeroinitializer
}
+define i8 @call_with_same_range() {
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
+ bitcast i8 0 to i8
+ %out = call i8 @dummy(), !range !0
+ ret i8 %out
+}
+
+
+
declare i8 @dummy();
declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/MergeFunc/constant-entire-value.ll b/test/Transforms/MergeFunc/constant-entire-value.ll
new file mode 100644
index 000000000000..cb193d06ee41
--- /dev/null
+++ b/test/Transforms/MergeFunc/constant-entire-value.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; RUN: opt -S -mergefunc < %s | FileCheck -check-prefix=NOPLUS %s
+
+; This makes sure that zeros in constants don't cause problems with string based
+; memory comparisons
+define internal i32 @sum(i32 %x, i32 %y) {
+; CHECK-LABEL: @sum
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 2 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
+define internal i32 @add(i32 %x, i32 %y) {
+; CHECK-LABEL: @add
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 1 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
+define internal i32 @plus(i32 %x, i32 %y) {
+; NOPLUS-NOT: @plus
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
+define internal i32 @next(i32 %x, i32 %y) {
+; CHECK-LABEL: @next
+ %sum = add i32 %x, %y
+ %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+ %sum2 = add i32 %sum, %1
+ %sum3 = add i32 %sum2, %y
+ ret i32 %sum3
+}
+
diff --git a/test/Transforms/MergeFunc/crash2.ll b/test/Transforms/MergeFunc/crash2.ll
new file mode 100644
index 000000000000..4b3a3f911e7e
--- /dev/null
+++ b/test/Transforms/MergeFunc/crash2.ll
@@ -0,0 +1,54 @@
+; RUN: opt %s -mergefunc -globalopt -S -o - | FileCheck %s
+
+; Make sure we don't crash on this example. This test is supposed to test that
+; MergeFunctions clears its GlobalNumbers value map. If this map still contains
+; entries when running globalopt and the MergeFunctions instance is still alive
+; the optimization of @G would cause an assert because globalopt would do an
+; RAUW on @G which still exists as an entry in the GlobalNumbers ValueMap which
+; causes an assert in the ValueHandle call back because we are RAUWing with a
+; different type (AllocaInst) than its key type (GlobalValue).
+
+@G = internal global i8** null
+@G2 = internal global i8** null
+
+define i32 @main(i32 %argc, i8** %argv) norecurse {
+; CHECK: alloca
+ store i8** %argv, i8*** @G
+ ret i32 0
+}
+
+define internal i8** @dead1(i64 %p) {
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ %tmp = load i8**, i8*** @G
+ ret i8** %tmp
+}
+
+define internal i8** @dead2(i64 %p) {
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ %tmp = load i8**, i8*** @G2
+ ret i8** %tmp
+}
+
+define void @left(i64 %p) {
+entry-block:
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ call void @right(i64 %p)
+ ret void
+}
+
+define void @right(i64 %p) {
+entry-block:
+ call void @left(i64 %p)
+ call void @left(i64 %p)
+ call void @left(i64 %p)
+ call void @left(i64 %p)
+ ret void
+}
diff --git a/test/Transforms/MergeFunc/gep-base-type.ll b/test/Transforms/MergeFunc/gep-base-type.ll
new file mode 100644
index 000000000000..bfbb247fb3a5
--- /dev/null
+++ b/test/Transforms/MergeFunc/gep-base-type.ll
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, the type of the GEP pointer argument does not have
+; the same stride.
+
+%"struct1" = type <{ i8*, i32, [4 x i8] }>
+%"struct2" = type { i8*, { i64, i64 } }
+
+define internal %struct2* @Ffunc(%struct2* %P, i64 %i) {
+; CHECK-LABEL: @Ffunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %2 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %3 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %4 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %5 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ %6 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+ ret %struct2* %6
+}
+
+
+define internal %struct1* @Gfunc(%struct1* %P, i64 %i) {
+; CHECK-LABEL: @Gfunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+ %1 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %2 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %3 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %4 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %5 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ %6 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+ ret %struct1* %6
+}
+
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
index 5f672debd919..86deb2c94953 100644
--- a/test/Transforms/MergeFunc/inttoptr-address-space.ll
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -21,7 +21,7 @@ define internal i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
bb:
; CHECK-LABEL: @func35(
; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
-; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* nocapture %[[V2]])
; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
%tmp = getelementptr inbounds %.qux.2585, %.qux.2585 addrspace(1)* %this, i32 0, i32 2
%tmp1 = load i8*, i8* addrspace(1)* %tmp, align 4
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 0abbf6239a34..05ae766a6e37 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -48,7 +48,7 @@ define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
bb:
; CHECK-LABEL: @func35(
; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
-; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* nocapture %[[V2]])
; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
%tmp = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
%tmp1 = load i8*, i8** %tmp, align 4
diff --git a/test/Transforms/MergeFunc/merge-block-address-other-function.ll b/test/Transforms/MergeFunc/merge-block-address-other-function.ll
new file mode 100644
index 000000000000..ca1a6f2fe2ab
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-block-address-other-function.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+ %retval = alloca i32, align 4
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 3, i32* %retval
+ br label %return
+
+if.end:
+ %1 = load i32, i32* %i.addr, align 4
+ %cmp1 = icmp eq i32 %1, 3
+ br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+ store i32 56, i32* %retval
+ br label %return
+
+if.end.3:
+ store i32 0, i32* %retval
+ br label %return
+
+return:
+ %2 = load i32, i32* %retval
+ ret i32 %2
+}
+
+
+define internal i8* @Afunc(i32* %P) {
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-NOT: @Bfunc
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
diff --git a/test/Transforms/MergeFunc/merge-block-address.ll b/test/Transforms/MergeFunc/merge-block-address.ll
new file mode 100644
index 000000000000..4ce13e5da874
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-block-address.ll
@@ -0,0 +1,91 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; These two functions are identical. The basic block labels are the same, and
+; induce the same CFG. We are testing that block addresses within different
+; functions are compared by their value, and not based on order. Both functions
+; come from the same C-code, but in the first the two val_0/val_1 basic blocks
+; are in a different order (they were manually switched post-compilation).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+ store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: tail call i32 @_Z1fi
+; CHECK-NEXT: ret
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+ store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
diff --git a/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll b/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
new file mode 100644
index 000000000000..8c86ab1fbc33
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
@@ -0,0 +1,20 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Afunc and Bfunc differ only in that one returns i64, the other a pointer.
+; These should be merged.
+define internal i64 @Afunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Afunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64 0
+}
+
+define internal i64* @Bfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Bfunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64* null
+}
+
diff --git a/test/Transforms/MergeFunc/merge-different-vector-types.ll b/test/Transforms/MergeFunc/merge-different-vector-types.ll
new file mode 100644
index 000000000000..7696139b332d
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-different-vector-types.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Merging should still work even if the values are wrapped in a vector.
+define internal <2 x i64> @Mfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal <2 x i64> @Mfunc
+ store i32 1, i32* %P
+ store i32 1, i32* %Q
+ ret <2 x i64> <i64 0, i64 0>
+}
+
+define internal <2 x i64*> @Nfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Nfunc
+ store i32 1, i32* %P
+ store i32 1, i32* %Q
+ ret <2 x i64*> <i64* null, i64* null>
+}
diff --git a/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll b/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
new file mode 100644
index 000000000000..3024a9a76a7e
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; There is a slight different in these two functions, in that the label values
+; are switched. They are thus not mergeable. This tests that block addresses
+; referring to blocks within each respective compared function are correctly
+; ordered.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1fi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+; Right here, this is val_0, and later the if might assign val_1
+ store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+ %i.addr = alloca i32, align 4
+ %ret = alloca i32, align 4
+ %l = alloca i8*, align 8
+ store i32 %i, i32* %i.addr, align 4
+ store i32 0, i32* %ret, align 4
+; This time, we store val_1 initially, and later the if might assign val_0
+ store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+ %0 = load i32, i32* %i.addr, align 4
+ %and = and i32 %0, 256
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+ br label %if.end
+
+if.end:
+ %1 = load i8*, i8** %l, align 8
+ br label %indirectgoto
+
+val_0:
+ store i32 12, i32* %ret, align 4
+ br label %end
+
+val_1:
+ store i32 42, i32* %ret, align 4
+ br label %end
+
+end:
+ %2 = load i32, i32* %ret, align 4
+ ret i32 %2
+
+indirectgoto:
+ %indirect.goto.dest = phi i8* [ %1, %if.end ]
+ indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0]
+}
+
diff --git a/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll b/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
new file mode 100644
index 000000000000..e1aa30ac55a2
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; We should not merge these two functions, because the blocks are different.
+; This tests the handling of block addresses from different functions.
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+define internal i8* @Afunc(i32* %P) {
+; CHECK-LABEL: @Afunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-LABEL: @Bfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %P
+ ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+ %retval = alloca i32, align 4
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %cmp = icmp eq i32 %0, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 3, i32* %retval
+ br label %return
+
+if.end:
+ %1 = load i32, i32* %i.addr, align 4
+ %cmp1 = icmp eq i32 %1, 3
+ br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+ store i32 56, i32* %retval
+ br label %return
+
+if.end.3:
+ store i32 0, i32* %retval
+ br label %return
+
+return:
+ %2 = load i32, i32* %retval
+ ret i32 %2
+}
diff --git a/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll b/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
new file mode 100644
index 000000000000..c0c6dab792d0
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
@@ -0,0 +1,24 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as the datalayout says a pointer is 64 bits. No
+; sext/zext is specified, so these functions could lower differently.
+define internal i32 @Ffunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i32 @Ffunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %Q
+ ret i32 0
+}
+
+define internal i64* @Gfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Gfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 1, i32* %P
+ store i32 3, i32* %Q
+ ret i64* null
+}
diff --git a/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll b/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
new file mode 100644
index 000000000000..6bd656408ac1
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
@@ -0,0 +1,23 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as 1 != 0.
+define internal i64 @Ifunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Ifunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 10, i32* %P
+ store i32 10, i32* %Q
+ ret i64 1
+}
+
+define internal i64* @Jfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Jfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+ store i32 10, i32* %P
+ store i32 10, i32* %Q
+ ret i64* null
+}
diff --git a/test/Transforms/MergeFunc/ranges-multiple.ll b/test/Transforms/MergeFunc/ranges-multiple.ll
new file mode 100644
index 000000000000..bfa775d217a7
--- /dev/null
+++ b/test/Transforms/MergeFunc/ranges-multiple.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+ %v1 = load i8, i8* %0, !range !0
+ %v2 = load i8, i8* %1, !range !0
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT: %v2 = load i8, i8* %1
+; CHECK-NEXT: %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT: ret i1 %out
+ %v1 = load i8, i8* %0
+ %v2 = load i8, i8* %1
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT: %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT: %v2 = load i8, i8* %1, !range !1
+; CHECK-NEXT: %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT: ret i1 %out
+ %v1 = load i8, i8* %0, !range !1
+ %v2 = load i8, i8* %1, !range !1
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+ %v1 = load i8, i8* %0, !range !0
+ %v2 = load i8, i8* %1, !range !0
+ %out = icmp eq i8 %v1, %v2
+ ret i1 %out
+}
+
+; The comparison must check every element of the range, not just the first pair.
+!0 = !{i8 0, i8 2, i8 21, i8 30}
+!1 = !{i8 0, i8 2, i8 21, i8 25}
diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll
index 46a0c76cc7d1..44e71300703b 100644
--- a/test/Transforms/MergeFunc/ranges.ll
+++ b/test/Transforms/MergeFunc/ranges.ll
@@ -8,10 +8,10 @@ define i1 @cmp_with_range(i8*, i8*) {
define i1 @cmp_no_range(i8*, i8*) {
; CHECK-LABEL: @cmp_no_range
-; CHECK-NEXT %v1 = load i8, i8* %0
-; CHECK-NEXT %v2 = load i8, i8* %1
-; CHECK-NEXT %out = icmp eq i8 %v1, %v2
-; CHECK-NEXT ret i1 %out
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT: %v2 = load i8, i8* %1
+; CHECK-NEXT: %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT: ret i1 %out
%v1 = load i8, i8* %0
%v2 = load i8, i8* %1
%out = icmp eq i8 %v1, %v2
diff --git a/test/Transforms/MergeFunc/self-referential-global.ll b/test/Transforms/MergeFunc/self-referential-global.ll
new file mode 100644
index 000000000000..d3d1c62aa7fe
--- /dev/null
+++ b/test/Transforms/MergeFunc/self-referential-global.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mergefunc -disable-output < %s
+
+; A linked list type and simple payload
+%LL = type { %S, %LL* }
+%S = type { void (%S*, i32)* }
+
+; Table refers to itself via GEP
+@Table = internal global [3 x %LL] [%LL { %S { void (%S*, i32)* @B }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }], align 16
+
+; The body of this is irrelevant; it is long so that mergefunc doesn't skip it as a small function.
+define internal void @A(%S* %self, i32 %a) {
+ %1 = add i32 %a, 32
+ %2 = add i32 %1, 32
+ %3 = add i32 %2, 32
+ %4 = add i32 %3, 32
+ %5 = add i32 %4, 32
+ %6 = add i32 %5, 32
+ %7 = add i32 %6, 32
+ %8 = add i32 %7, 32
+ %9 = add i32 %8, 32
+ %10 = add i32 %9, 32
+ %11 = add i32 %10, 32
+ ret void
+}
+
+define internal void @B(%S* %self, i32 %a) {
+ %1 = add i32 %a, 32
+ %2 = add i32 %1, 32
+ %3 = add i32 %2, 32
+ %4 = add i32 %3, 32
+ %5 = add i32 %4, 32
+ %6 = add i32 %5, 32
+ %7 = add i32 %6, 32
+ %8 = add i32 %7, 32
+ %9 = add i32 %8, 32
+ %10 = add i32 %9, 32
+ %11 = add i32 %10, 32
+ ret void
+}
+
diff --git a/test/Transforms/MergeFunc/undef-different-types.ll b/test/Transforms/MergeFunc/undef-different-types.ll
new file mode 100644
index 000000000000..4694146e55f4
--- /dev/null
+++ b/test/Transforms/MergeFunc/undef-different-types.ll
@@ -0,0 +1,21 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Cfunc and Dfunc differ only in that one returns i64, the other a pointer, and
+; both return undef. They should be merged. Note undef cannot be merged with
+; anything else, because this implies the ordering will be inconsistent (i.e.
+; -1 == undef and undef == 1, but -1 < 1, so we must have undef != <any int>).
+define internal i64 @Cfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Cfunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64 undef
+}
+
+define internal i64* @Dfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Dfunc
+ store i32 4, i32* %P
+ store i32 6, i32* %Q
+ ret i64* undef
+}
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index e126bed9b232..213fbe3bbff7 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-pc-linux-gnu"
@func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
@global_3_xxx = common global i32 0, align 4
-@func_7_xxx = weak alias i32 (...)* @aliased_func_7_xxx
+@func_7_xxx = weak alias i32 (...), i32 (...)* @aliased_func_7_xxx
define i32 @aliased_func_7_xxx(...) {
ret i32 0
diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
index 92fbd20d2982..be219404d5be 100644
--- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
+++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
@@ -123,4 +123,21 @@ define void @reassociate_gep_128(float* %a, i128 %i, i128 %j) {
ret void
}
+%struct.complex = type { float, float }
+
+declare void @bar(%struct.complex*)
+
+define void @different_types(%struct.complex* %input, i64 %i) {
+; CHECK-LABEL: @different_types(
+ %t1 = getelementptr %struct.complex, %struct.complex* %input, i64 %i
+ call void @bar(%struct.complex* %t1)
+ %j = add i64 %i, 5
+ %t2 = getelementptr %struct.complex, %struct.complex* %input, i64 %j, i32 0
+; CHECK: [[cast:[^ ]+]] = bitcast %struct.complex* %t1 to float*
+; CHECK-NEXT: %t2 = getelementptr float, float* [[cast]], i64 10
+; CHECK-NEXT: call void @foo(float* %t2)
+ call void @foo(float* %t2)
+ ret void
+}
+
declare void @llvm.assume(i1)
diff --git a/test/Transforms/NaryReassociate/nary-add.ll b/test/Transforms/NaryReassociate/nary-add.ll
index b3093ff6ecd6..654ef2c49617 100644
--- a/test/Transforms/NaryReassociate/nary-add.ll
+++ b/test/Transforms/NaryReassociate/nary-add.ll
@@ -17,8 +17,9 @@ define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
call void @foo(i32 %1)
%2 = add i32 %b, %c
%3 = add i32 %a, %2
-; CHECK: add i32 [[BASE]], %b
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
ret void
}
@@ -35,8 +36,9 @@ define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
call void @foo(i32 %1)
%2 = add i32 %a, %b
%3 = add i32 %2, %c
-; CHECK: add i32 [[BASE]], %b
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
ret void
}
diff --git a/test/Transforms/NaryReassociate/nary-mul.ll b/test/Transforms/NaryReassociate/nary-mul.ll
new file mode 100644
index 000000000000..467843c7a39a
--- /dev/null
+++ b/test/Transforms/NaryReassociate/nary-mul.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+declare void @foo(i32)
+
+; CHECK-LABEL: @bar(
+define void @bar(i32 %a, i32 %b, i32 %c) {
+ %1 = mul i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = mul i32 %a, %c
+ call void @foo(i32 %1)
+ %2 = mul i32 %a, %b
+ %3 = mul i32 %2, %c
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = mul i32 [[BASE]], %b
+ call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
+ ret void
+}
+
diff --git a/test/Transforms/NaryReassociate/pr24301.ll b/test/Transforms/NaryReassociate/pr24301.ll
new file mode 100644
index 000000000000..898707831f95
--- /dev/null
+++ b/test/Transforms/NaryReassociate/pr24301.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+define i32 @foo(i32 %tmp4) {
+; CHECK-LABEL: @foo(
+entry:
+ %tmp5 = add i32 %tmp4, 8
+ %tmp13 = add i32 %tmp4, -128 ; deleted
+ %tmp14 = add i32 %tmp13, 8 ; => %tmp5 + -128
+ %tmp21 = add i32 119, %tmp4
+ ; do not rewrite %tmp23 against %tmp13 because %tmp13 is already deleted
+ %tmp23 = add i32 %tmp21, -128
+; CHECK: %tmp23 = add i32 %tmp21, -128
+ ret i32 %tmp23
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 9fc5ad1f1008..fc1d087794d6 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -2684,8 +2684,8 @@ define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) n
invoke.cont:
%0 = bitcast {}* %self to i8*
%1 = tail call i8* @objc_retain(i8* %0) nounwind
- tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
- tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
+ tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
+ tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
%ivar = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
%add.ptr = getelementptr i8, i8* %0, i64 %ivar
%tmp1 = bitcast i8* %add.ptr to float*
@@ -3018,7 +3018,7 @@ define void @test67(i8* %x) {
!0 = !{}
!1 = !{i32 1, !"Debug Info Version", i32 3}
-!2 = !DISubprogram()
+!2 = distinct !DISubprogram()
; CHECK: attributes #0 = { nounwind readnone }
; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index db3a780f91b0..ef8d8e52d1cc 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -34,7 +34,7 @@ target triple = "x86_64-apple-macosx10.9.0"
@"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
@llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
-define i32 @main() uwtable ssp personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) {
+define i32 @main() uwtable ssp personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) !dbg !5 {
entry:
%tmp = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
%tmp1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
@@ -84,7 +84,7 @@ declare void @objc_end_catch()
declare void @objc_exception_rethrow()
-define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp !dbg !27 {
entry:
%tmp = call i8* @objc_retain(i8* %obj) nounwind
call void @llvm.dbg.value(metadata i8* %obj, i64 0, metadata !32, metadata !DIExpression()), !dbg !55
@@ -113,16 +113,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33, !34, !35, !36, !61}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
!1 = !{}
!3 = !{!5, !27}
-!5 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, function: i32 ()* @main, variables: !11)
+!5 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, variables: !11)
!6 = !DIFile(filename: "test.m", directory: "/Volumes/Files/gottesmmcab/Radar/12906997")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !{!12, !21, !25}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "obj", line: 11, scope: !13, file: !6, type: !14)
+!12 = !DILocalVariable(name: "obj", line: 11, scope: !13, file: !6, type: !14)
!13 = distinct !DILexicalBlock(line: 10, column: 0, file: !60, scope: !5)
!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "id", line: 11, file: !60, baseType: !15)
!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !60, baseType: !16)
@@ -131,17 +131,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!18 = !DIDerivedType(tag: DW_TAG_member, name: "isa", size: 64, file: !60, scope: !16, baseType: !19)
!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !20)
!20 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_class", flags: DIFlagFwdDecl, file: !60)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ok", line: 13, scope: !22, file: !6, type: !23)
+!21 = !DILocalVariable(name: "ok", line: 13, scope: !22, file: !6, type: !23)
!22 = distinct !DILexicalBlock(line: 12, column: 0, file: !60, scope: !13)
!23 = !DIDerivedType(tag: DW_TAG_typedef, name: "BOOL", line: 62, file: !60, baseType: !24)
!24 = !DIBasicType(tag: DW_TAG_base_type, name: "signed char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "obj2", line: 15, scope: !26, file: !6, type: !14)
+!25 = !DILocalVariable(name: "obj2", line: 15, scope: !26, file: !6, type: !14)
!26 = distinct !DILexicalBlock(line: 14, column: 0, file: !60, scope: !22)
-!27 = !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, function: void (i8*)* @ThrowFunc, variables: !31)
+!27 = distinct !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, variables: !31)
!28 = !DISubroutineType(types: !29)
!29 = !{null, !14}
!31 = !{!32}
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
+!32 = !DILocalVariable(name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
!33 = !{i32 1, !"Objective-C Version", i32 2}
!34 = !{i32 1, !"Objective-C Image Info Version", i32 0}
!35 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 464426abfb06..cf14a1f9a663 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -819,5 +819,7 @@ entry:
ret void
}
-; CHECK: attributes [[NUW]] = { nounwind }
+
+; CHECK: attributes #0 = { argmemonly nounwind }
; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/provenance.ll b/test/Transforms/ObjCARC/provenance.ll
index aa5a932a86e9..2587c11d01ed 100644
--- a/test/Transforms/ObjCARC/provenance.ll
+++ b/test/Transforms/ObjCARC/provenance.ll
@@ -1,4 +1,4 @@
-; RUN: opt -disable-output -pa-eval %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output -disable-basicaa -pa-eval %s 2>&1 | FileCheck %s
@"\01l_objc_msgSend_fixup_" = global i8 0
@g1 = global i8 0, section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
diff --git a/test/Transforms/PGOProfile/Inputs/branch1.proftext b/test/Transforms/PGOProfile/Inputs/branch1.proftext
new file mode 100644
index 000000000000..3e28112706f1
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/branch1.proftext
@@ -0,0 +1,6 @@
+test_br_1
+25571299074
+2
+3
+2
+
diff --git a/test/Transforms/PGOProfile/Inputs/branch2.proftext b/test/Transforms/PGOProfile/Inputs/branch2.proftext
new file mode 100644
index 000000000000..7d9bd72b29f2
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/branch2.proftext
@@ -0,0 +1,6 @@
+test_br_2
+29667547796
+2
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/criticaledge.proftext b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
new file mode 100644
index 000000000000..f369ba7c3504
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
@@ -0,0 +1,17 @@
+test_criticalEdge
+82323253069
+8
+2
+1
+2
+2
+0
+1
+2
+1
+
+<stdin>:bar
+12884901887
+1
+7
+
diff --git a/test/Transforms/PGOProfile/Inputs/diag.proftext b/test/Transforms/PGOProfile/Inputs/diag.proftext
new file mode 100644
index 000000000000..aaa137e3a420
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/diag.proftext
@@ -0,0 +1,5 @@
+foo
+12884999999
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/landingpad.proftext b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
new file mode 100644
index 000000000000..b2bd451611bf
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
@@ -0,0 +1,14 @@
+foo
+59130013419
+4
+3
+1
+2
+0
+
+bar
+24868915205
+2
+1
+2
+
diff --git a/test/Transforms/PGOProfile/Inputs/loop1.proftext b/test/Transforms/PGOProfile/Inputs/loop1.proftext
new file mode 100644
index 000000000000..58c05fbe1676
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/loop1.proftext
@@ -0,0 +1,6 @@
+test_simple_for
+34137660316
+2
+96
+4
+
diff --git a/test/Transforms/PGOProfile/Inputs/loop2.proftext b/test/Transforms/PGOProfile/Inputs/loop2.proftext
new file mode 100644
index 000000000000..1c429ea5d5f4
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/loop2.proftext
@@ -0,0 +1,7 @@
+test_nested_for
+53929068288
+3
+33
+10
+6
+
diff --git a/test/Transforms/PGOProfile/Inputs/switch.proftext b/test/Transforms/PGOProfile/Inputs/switch.proftext
new file mode 100644
index 000000000000..7b406b87ef70
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/switch.proftext
@@ -0,0 +1,8 @@
+test_switch
+46200943743
+4
+0
+5
+2
+3
+
diff --git a/test/Transforms/PGOProfile/branch1.ll b/test/Transforms/PGOProfile/branch1.ll
new file mode 100644
index 000000000000..cc354d3425c6
--- /dev/null
+++ b/test/Transforms/PGOProfile/branch1.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/branch1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_br_1 = private constant [9 x i8] c"test_br_1"
+
+define i32 @test_br_1(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: llvm.instrprof.increment
+ %cmp = icmp sgt i32 %i, 0
+ br i1 %cmp, label %if.then, label %if.end
+; USE: br i1 %cmp, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
+; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 1)
+ %add = add nsw i32 %i, 2
+ br label %if.end
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 0)
+ %retv = phi i32 [ %add, %if.then ], [ %i, %entry ]
+ ret i32 %retv
+}
diff --git a/test/Transforms/PGOProfile/branch2.ll b/test/Transforms/PGOProfile/branch2.ll
new file mode 100644
index 000000000000..1e8bc5ec2a38
--- /dev/null
+++ b/test/Transforms/PGOProfile/branch2.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/branch2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_br_2 = private constant [9 x i8] c"test_br_2"
+
+define i32 @test_br_2(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: llvm.instrprof.increment
+ %cmp = icmp sgt i32 %i, 0
+ br i1 %cmp, label %if.then, label %if.else
+; USE: br i1 %cmp, label %if.then, label %if.else
+; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
+; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 1}
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 0)
+ %add = add nsw i32 %i, 2
+ br label %if.end
+
+if.else:
+; GEN: if.else:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 1)
+ %sub = sub nsw i32 %i, 2
+ br label %if.end
+
+if.end:
+; GEN: if.end:
+; GEN-NOT: llvm.instrprof.increment
+ %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
+ ret i32 %retv
+; GEN: ret
+}
diff --git a/test/Transforms/PGOProfile/criticaledge.ll b/test/Transforms/PGOProfile/criticaledge.ll
new file mode 100644
index 000000000000..0089bbea1558
--- /dev/null
+++ b/test/Transforms/PGOProfile/criticaledge.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/criticaledge.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_criticalEdge = private constant [17 x i8] c"test_criticalEdge"
+; GEN: @__profn__stdin__bar = private constant [11 x i8] c"<stdin>:bar"
+
+define i32 @test_criticalEdge(i32 %i, i32 %j) {
+entry:
+; CHECK: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ switch i32 %i, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ i32 4, label %sw.bb2
+; CHECK: i32 3, label %entry.sw.bb2_crit_edge
+; CHECK: i32 4, label %entry.sw.bb2_crit_edge1
+ i32 5, label %sw.bb2
+ ]
+; USE: ]
+; USE-SAME: !prof ![[BW_SWITCH:[0-9]+]]
+
+; CHECK: entry.sw.bb2_crit_edge1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 1)
+; CHECK: br label %sw.bb2
+
+; CHECK: entry.sw.bb2_crit_edge:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 0)
+; CHECK: br label %sw.bb2
+
+sw.bb:
+; GEN: sw.bb:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 5)
+ %call = call i32 @bar(i32 2)
+ br label %sw.epilog
+
+sw.bb1:
+; GEN: sw.bb1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 4)
+ %call2 = call i32 @bar(i32 1024)
+ br label %sw.epilog
+
+sw.bb2:
+; GEN: sw.bb2:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %cmp = icmp eq i32 %j, 2
+ br i1 %cmp, label %if.then, label %if.end
+; USE: br i1 %cmp, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_SW_BB2:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 2)
+ %call4 = call i32 @bar(i32 4)
+ br label %return
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 3)
+ %call5 = call i32 @bar(i32 8)
+ br label %sw.epilog
+
+sw.default:
+; GEN: sw.default:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %call6 = call i32 @bar(i32 32)
+ %cmp7 = icmp sgt i32 %j, 10
+ br i1 %cmp7, label %if.then8, label %if.end9
+; USE: br i1 %cmp7, label %if.then8, label %if.end9
+; USE-SAME: !prof ![[BW_SW_DEFAULT:[0-9]+]]
+
+if.then8:
+; GEN: if.then8:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 7)
+ %add = add nsw i32 %call6, 10
+ br label %if.end9
+
+if.end9:
+; GEN: if.end9:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 6)
+ %res.0 = phi i32 [ %add, %if.then8 ], [ %call6, %sw.default ]
+ br label %sw.epilog
+
+sw.epilog:
+; GEN: sw.epilog:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %res.1 = phi i32 [ %res.0, %if.end9 ], [ %call5, %if.end ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+ br label %return
+
+return:
+; GEN: return:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %retval = phi i32 [ %res.1, %sw.epilog ], [ %call4, %if.then ]
+ ret i32 %retval
+}
+
+define internal i32 @bar(i32 %i) {
+entry:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__bar, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+ ret i32 %i
+}
+
+; USE: ![[BW_SWITCH]] = !{!"branch_weights", i32 2, i32 1, i32 0, i32 2, i32 1, i32 1}
+; USE: ![[BW_SW_BB2]] = !{!"branch_weights", i32 2, i32 2}
+; USE: ![[BW_SW_DEFAULT]] = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/Transforms/PGOProfile/diag_mismatch.ll b/test/Transforms/PGOProfile/diag_mismatch.ll
new file mode 100644
index 000000000000..a2d0b20620f0
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_mismatch.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: Function control flow change detected (hash mismatch) foo
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/diag_no_funcprofdata.ll b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
new file mode 100644
index 000000000000..2e5ec0444b42
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: No profile data available for function bar
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @bar() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/diag_no_profile.ll b/test/Transforms/PGOProfile/diag_no_profile.ll
new file mode 100644
index 000000000000..ce7b59b8f69d
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_no_profile.ll
@@ -0,0 +1,9 @@
+; RUN: not opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/landingpad.ll b/test/Transforms/PGOProfile/landingpad.ll
new file mode 100644
index 000000000000..33fe62fbae03
--- /dev/null
+++ b/test/Transforms/PGOProfile/landingpad.ll
@@ -0,0 +1,124 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/landingpad.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@val = global i32 0, align 4
+@_ZTIi = external constant i8*
+; GEN: @__profn_bar = private constant [3 x i8] c"bar"
+; GEN: @__profn_foo = private constant [3 x i8] c"foo"
+
+define i32 @bar(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %rem = srem i32 %i, 3
+ %tobool = icmp ne i32 %rem, 0
+ br i1 %tobool, label %if.then, label %if.end
+; USE: br i1 %tobool, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_BAR_ENTRY:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 1)
+ %exception = call i8* @__cxa_allocate_exception(i64 4)
+ %tmp = bitcast i8* %exception to i32*
+ store i32 %i, i32* %tmp, align 16
+ call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+ unreachable
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 0)
+ ret i32 0
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define i32 @foo(i32 %i) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %rem = srem i32 %i, 2
+ %tobool = icmp ne i32 %rem, 0
+ br i1 %tobool, label %if.then, label %if.end
+; USE: br i1 %tobool, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_FOO_ENTRY:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %mul = mul nsw i32 %i, 7
+ %call = invoke i32 @bar(i32 %mul)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; GEN: invoke.cont:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 1)
+ br label %if.end
+
+lpad:
+; GEN: lpad:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %tmp = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+ %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+ br label %catch.dispatch
+
+catch.dispatch:
+; GEN: catch.dispatch:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+ %matches = icmp eq i32 %tmp2, %tmp3
+ br i1 %matches, label %catch, label %eh.resume
+; USE: br i1 %matches, label %catch, label %eh.resume
+; USE-SAME: !prof ![[BW_CATCH_DISPATCH:[0-9]+]]
+
+catch:
+; GEN: catch:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 2)
+ %tmp4 = call i8* @__cxa_begin_catch(i8* %tmp1)
+ %tmp5 = bitcast i8* %tmp4 to i32*
+ %tmp6 = load i32, i32* %tmp5, align 4
+ %tmp7 = load i32, i32* @val, align 4
+ %sub = sub nsw i32 %tmp7, %tmp6
+ store i32 %sub, i32* @val, align 4
+ call void @__cxa_end_catch()
+ br label %try.cont
+
+try.cont:
+; GEN: try.cont:
+; GEN-NOT: call void @llvm.instrprof.increment
+ ret i32 -1
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 0)
+ %tmp8 = load i32, i32* @val, align 4
+ %add = add nsw i32 %tmp8, %i
+ store i32 %add, i32* @val, align 4
+ br label %try.cont
+
+eh.resume:
+; GEN: eh.resume:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 3)
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %tmp1, 0
+ %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %tmp2, 1
+ resume { i8*, i32 } %lpad.val3
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; USE: ![[BW_BAR_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+; USE: ![[BW_FOO_ENTRY]] = !{!"branch_weights", i32 3, i32 2}
+; USE: ![[BW_CATCH_DISPATCH]] = !{!"branch_weights", i32 2, i32 0}
diff --git a/test/Transforms/PGOProfile/loop1.ll b/test/Transforms/PGOProfile/loop1.ll
new file mode 100644
index 000000000000..aa5aa86b1e54
--- /dev/null
+++ b/test/Transforms/PGOProfile/loop1.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/loop1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_simple_for = private constant [15 x i8] c"test_simple_for"
+
+define i32 @test_simple_for(i32 %n) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ br label %for.cond
+
+for.cond:
+; GEN: for.cond:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+ %sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
+ %cmp = icmp slt i32 %i, %n
+ br i1 %cmp, label %for.body, label %for.end
+; USE: br i1 %cmp, label %for.body, label %for.end
+; USE-SAME: !prof ![[BW_FOR_COND:[0-9]+]]
+; USE: ![[BW_FOR_COND]] = !{!"branch_weights", i32 96, i32 4}
+
+for.body:
+; GEN: for.body:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %inc = add nsw i32 %sum, 1
+ br label %for.inc
+
+for.inc:
+; GEN: for.inc:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 0)
+ %inc1 = add nsw i32 %i, 1
+ br label %for.cond
+
+for.end:
+; GEN: for.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 1)
+ ret i32 %sum
+}
diff --git a/test/Transforms/PGOProfile/loop2.ll b/test/Transforms/PGOProfile/loop2.ll
new file mode 100644
index 000000000000..ec3e16d461bc
--- /dev/null
+++ b/test/Transforms/PGOProfile/loop2.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/loop2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_nested_for = private constant [15 x i8] c"test_nested_for"
+
+define i32 @test_nested_for(i32 %r, i32 %s) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ br label %for.cond.outer
+
+for.cond.outer:
+; GEN: for.cond.outer:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %i.0 = phi i32 [ 0, %entry ], [ %inc.2, %for.inc.outer ]
+ %sum.0 = phi i32 [ 1, %entry ], [ %sum.1, %for.inc.outer ]
+ %cmp = icmp slt i32 %i.0, %r
+ br i1 %cmp, label %for.body.outer, label %for.end.outer
+; USE: br i1 %cmp, label %for.body.outer, label %for.end.outer
+; USE-SAME: !prof ![[BW_FOR_COND_OUTER:[0-9]+]]
+
+for.body.outer:
+; GEN: for.body.outer:
+; GEN-NOT: call void @llvm.instrprof.increment
+ br label %for.cond.inner
+
+for.cond.inner:
+; GEN: for.cond.inner:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %j.0 = phi i32 [ 0, %for.body.outer ], [ %inc.1, %for.inc.inner ]
+ %sum.1 = phi i32 [ %sum.0, %for.body.outer ], [ %inc, %for.inc.inner ]
+ %cmp2 = icmp slt i32 %j.0, %s
+ br i1 %cmp2, label %for.body.inner, label %for.end.inner
+; USE: br i1 %cmp2, label %for.body.inner, label %for.end.inner
+; USE-SAME: !prof ![[BW_FOR_COND_INNER:[0-9]+]]
+
+for.body.inner:
+; GEN: for.body.inner:
+; GEN-NOT: call void @llvm.instrprof.increment
+ %inc = add nsw i32 %sum.1, 1
+ br label %for.inc.inner
+
+for.inc.inner:
+; GEN: for.inc.inner:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 0)
+ %inc.1 = add nsw i32 %j.0, 1
+ br label %for.cond.inner
+
+for.end.inner:
+; GEN: for.end.inner:
+ br label %for.inc.outer
+
+for.inc.outer:
+; GEN: for.inc.outer:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 1)
+ %inc.2 = add nsw i32 %i.0, 1
+ br label %for.cond.outer
+
+for.end.outer:
+; GEN: for.end.outer:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 2)
+ ret i32 %sum.0
+}
+
+; USE-DAG: ![[BW_FOR_COND_OUTER]] = !{!"branch_weights", i32 10, i32 6}
+; USE-DAG: ![[BW_FOR_COND_INNER]] = !{!"branch_weights", i32 33, i32 10}
+
diff --git a/test/Transforms/PGOProfile/single_bb.ll b/test/Transforms/PGOProfile/single_bb.ll
new file mode 100644
index 000000000000..f904d09b8e7a
--- /dev/null
+++ b/test/Transforms/PGOProfile/single_bb.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_single_bb = private constant [9 x i8] c"single_bb"
+
+define i32 @single_bb() {
+entry:
+; GEN: entry:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_single_bb, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/switch.ll b/test/Transforms/PGOProfile/switch.ll
new file mode 100644
index 000000000000..3177dc0bd040
--- /dev/null
+++ b/test/Transforms/PGOProfile/switch.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/switch.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_switch = private constant [11 x i8] c"test_switch"
+
+define void @test_switch(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+ switch i32 %i, label %sw.default [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ ]
+; USE: ]
+; USE-SAME: !prof ![[BW_SWITCH:[0-9]+]]
+; USE: ![[BW_SWITCH]] = !{!"branch_weights", i32 3, i32 2, i32 0, i32 5}
+
+sw.bb:
+; GEN: sw.bb:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 2)
+ br label %sw.epilog
+
+sw.bb1:
+; GEN: sw.bb1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 0)
+ br label %sw.epilog
+
+sw.bb2:
+; GEN: sw.bb2:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 1)
+ br label %sw.epilog
+
+sw.default:
+; GEN: sw.default:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 3)
+ br label %sw.epilog
+
+sw.epilog:
+; GEN: sw.epilog:
+; GEN-NOT: call void @llvm.instrprof.increment
+ ret void
+; GEN: ret void
+}
diff --git a/test/Transforms/PlaceSafepoints/basic.ll b/test/Transforms/PlaceSafepoints/basic.ll
index 32aa4da68f21..8cdbc217b849 100644
--- a/test/Transforms/PlaceSafepoints/basic.ll
+++ b/test/Transforms/PlaceSafepoints/basic.ll
@@ -74,7 +74,7 @@ define i1 @test_call_with_result() gc "statepoint-example" {
; CHECK: gc.statepoint.p0f_isVoidf
; CHECK: gc.statepoint.p0f_i1i1f
; CHECK: (i64 2882400000, i32 0, i1 (i1)* @i1_return_i1, i32 1, i32 0, i1 false, i32 0, i32 0)
-; CHECK: %call1.2 = call i1 @llvm.experimental.gc.result.i1
+; CHECK: %call12 = call i1 @llvm.experimental.gc.result.i1
entry:
%call1 = tail call i1 (i1) @i1_return_i1(i1 false)
ret i1 %call1
diff --git a/test/Transforms/PlaceSafepoints/call_gc_result.ll b/test/Transforms/PlaceSafepoints/call_gc_result.ll
index d78a0989c3b1..f2929bfd58ab 100644
--- a/test/Transforms/PlaceSafepoints/call_gc_result.ll
+++ b/test/Transforms/PlaceSafepoints/call_gc_result.ll
@@ -21,8 +21,8 @@ branch2:
merge:
;; CHECK: %phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
-;; CHECK-NEXT: %safepoint_token.1 = call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
-;; CHECK-NEXT: %ret.2 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token.1)
+;; CHECK-NEXT: %safepoint_token1 = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
+;; CHECK-NEXT: %ret2 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token1)
%phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
%ret = call i32 @foo()
ret i32 %ret
diff --git a/test/Transforms/PlaceSafepoints/finite-loops.ll b/test/Transforms/PlaceSafepoints/finite-loops.ll
index 3cc7158afcfe..b98073d6a6e6 100644
--- a/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -1,6 +1,7 @@
; Tests to ensure that we are not placing backedge safepoints in
; loops which are clearly finite.
-;; RUN: opt %s -place-safepoints -S | FileCheck %s
+;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=32 -S | FileCheck %s
+;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=64 -S | FileCheck %s -check-prefix=COUNTED-64
; A simple counted loop with trivially known range
@@ -10,6 +11,7 @@ define void @test1(i32) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -31,6 +33,7 @@ define void @test2(i32) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -55,6 +58,7 @@ define void @test3(i8 %upper) gc "statepoint-example" {
; CHECK: statepoint
; CHECK-LABEL: loop
; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
entry:
br label %loop
@@ -69,6 +73,65 @@ exit:
ret void
}
+; The range is a 64 bit value
+define void @test4(i64 %upper) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK-LABEL: entry
+; CHECK: statepoint
+; CHECK-LABEL: loop
+; CHECK: statepoint
+; CHECK-LABEL: exit
+
+; COUNTED-64-LABEL: test4
+; COUNTED-64-LABEL: entry
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: loop
+; COUNTED-64-NOT: statepoint
+; COUNTED-64-LABEL: exit
+
+entry:
+ br label %loop
+
+loop:
+ %counter = phi i64 [ 0 , %entry ], [ %counter.inc , %loop ]
+ %counter.inc = add i64 %counter, 1
+ %counter.cmp = icmp slt i64 %counter.inc, %upper
+ br i1 %counter.cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; This loop can run infinitely (for %upper == INT64_MAX) so it needs a
+; safepoint.
+define void @test5(i64 %upper) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK-LABEL: entry
+; CHECK: statepoint
+; CHECK-LABEL: loop
+; CHECK: statepoint
+; CHECK-LABEL: exit
+
+; COUNTED-64-LABEL: test5
+; COUNTED-64-LABEL: entry
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: loop
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: exit
+
+entry:
+ br label %loop
+
+loop:
+ %counter = phi i64 [ 0 , %entry ], [ %counter.inc , %loop ]
+ %counter.inc = add i64 %counter, 1
+ %counter.cmp = icmp sle i64 %counter.inc, %upper
+ br i1 %counter.cmp, label %loop, label %exit
+
+exit:
+ ret void
+}
+
; This function is inlined when inserting a poll.
declare void @do_safepoint()
diff --git a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll b/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
index 9387f42bf0ab..2303ac7ef515 100644
--- a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
+++ b/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
@@ -7,7 +7,7 @@ define void @test_id() gc "statepoint-example" personality i32 ()* @personality_
; CHECK-LABEL: @test_id(
entry:
; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
invoke void @f() "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
normal_return:
@@ -22,7 +22,7 @@ define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()*
; CHECK-LABEL: @test_num_patch_bytes(
entry:
; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* null,
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
invoke void @f() "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
normal_return:
diff --git a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll b/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
index 6048f63c7f7b..eaefefa7ad1d 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
@@ -6,7 +6,7 @@
define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
; CHECK-LABEL: @test_invoke_format(
; CHECK-LABEL: entry:
-; CHECK: invoke coldcc i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = invoke coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
@@ -23,7 +23,7 @@ exceptional_return:
define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
; CHECK-LABEL: @test_call_format(
; CHECK-LABEL: entry:
-; CHECK: call coldcc i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = call coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/PlaceSafepoints/statepoint-format.ll b/test/Transforms/PlaceSafepoints/statepoint-format.ll
index 496091f552d1..c3712a3ace00 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-format.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-format.ll
@@ -6,7 +6,7 @@
define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
; CHECK-LABEL: @test_invoke_format(
; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = invoke i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
@@ -23,7 +23,7 @@ exceptional_return:
define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
; CHECK-LABEL: @test_call_format(
; CHECK-LABEL: entry:
-; CHECK: call i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
entry:
%ret_val = call i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/PruneEH/operand-bundles.ll b/test/Transforms/PruneEH/operand-bundles.ll
new file mode 100644
index 000000000000..efe8f62a8fb2
--- /dev/null
+++ b/test/Transforms/PruneEH/operand-bundles.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -prune-eh -S | FileCheck %s
+
+declare void @nounwind() nounwind
+
+define internal void @foo() {
+ call void @nounwind()
+ ret void
+}
+
+define i32 @caller() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @caller(
+; CHECK-NOT: invoke
+; CHECK: call void @foo() [ "foo"(i32 0, i8 1) ]
+ invoke void @foo() [ "foo"(i32 0, i8 1) ]
+ to label %Normal unwind label %Except
+
+Normal: ; preds = %0
+ ret i32 0
+
+Except: ; preds = %0
+ landingpad { i8*, i32 }
+ catch i8* null
+ ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
index 9fbb5ccfe9a2..fb76b9d990b0 100644
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; CHECK-LABEL: @test2
-; CHECK-NEXT: fadd fast <2 x float> %c, %b
-; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
-; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a
+; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret <2 x float>
%t0 = fmul fast <2 x float> %a, %b
@@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
; Check x*y+y*x -> x*y*2.
define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @test11
-; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: %factor = fmul fast <2 x double> %x, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y
; CHECK-NEXT: ret <2 x double> %tmp1
%1 = fmul fast <2 x double> %x, %y
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll
index 64b74e3e8c16..c8a2bd9c1935 100644
--- a/test/Transforms/Reassociate/fast-basictest.ll
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) {
; CHECK-LABEL: @test7
; CHECK-NEXT: fadd fast float %C, %B
; CHECK-NEXT: fmul fast float %A, %A
-; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %tmp2
; CHECK-NEXT: ret float
%aa = fmul fast float %A, %A
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll
index ad89607a21e4..6565bbb3d201 100644
--- a/test/Transforms/Reassociate/fast-fp-commute.ll
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -33,8 +33,8 @@ define float @test2(float %x, float %y) {
define float @test3(float %x, float %y) {
; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
-; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y
; CHECK-NEXT: ret float %tmp1
%1 = fmul fast float %x, %y
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll
index 45e15c7f3539..aea997cdcbda 100644
--- a/test/Transforms/Reassociate/fast-multistep.ll
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -3,9 +3,9 @@
define float @fmultistep1(float %a, float %b, float %c) {
; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
; CHECK-LABEL: @fmultistep1
-; CHECK-NEXT: fadd fast float %c, %b
-; CHECK-NEXT: fmul fast float %a, %tmp2
-; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a
+; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret float
%t0 = fmul fast float %a, %b
diff --git a/test/Transforms/Reassociate/fp-expr.ll b/test/Transforms/Reassociate/fp-expr.ll
new file mode 100644
index 000000000000..5af3b1991c9e
--- /dev/null
+++ b/test/Transforms/Reassociate/fp-expr.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+
+define void @test1() {
+; CHECK-LABEL: @test1
+; CHECK: call
+; CHECK: fsub
+; CHECK: fadd
+ %tmp = tail call <4 x float> @blam()
+ %tmp23 = fsub fast <4 x float> undef, %tmp
+ %tmp24 = fadd fast <4 x float> %tmp23, undef
+ tail call void @wombat(<4 x float> %tmp24)
+ ret void
+}
+
+define half @test2() {
+; CHECK-LABEL: @test2
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fadd
+ %tmp15 = fsub fast half undef, undef
+ %tmp17 = fsub fast half undef, %tmp15
+ %tmp18 = fadd fast half undef, %tmp17
+ ret half %tmp18
+}
+
+
+
+; Function Attrs: optsize
+declare <4 x float> @blam()
+
+; Function Attrs: optsize
+declare void @wombat(<4 x float>)
+
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index c499646a8b6a..5685bb949537 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
%t2 = mul i64 %a, %c
%t3 = mul i64 %a, %t2 ; a*(a*c)
%t4 = add i64 %t1, %t3
-; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a
+; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]]
; CHECK-NEXT: ret
ret i64 %t4
}
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
new file mode 100644
index 000000000000..c2cdffce61e4
--- /dev/null
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: faddsubAssoc1
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
+; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is A op (B op C)
+define half @faddsubAssoc1(half %a, half %b) {
+ %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+ %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+ %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+ %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b
+ %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b)
+ ret half %tmp5 ; = 5 * (b - a)
+}
+
+; CHECK-LABEL: faddsubAssoc2
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00
+; CHECK: fadd fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is (A op B) op C
+define half @faddsubAssoc2(half %a, half %b) {
+ %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+ %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+ %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+ %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b
+ %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b)
+ ret half %tmp5 ; = 5 * a + b
+}
+
diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll
index a52000ada537..388cd6bcb6fe 100644
--- a/test/Transforms/Reassociate/secondary.ll
+++ b/test/Transforms/Reassociate/secondary.ll
@@ -6,7 +6,7 @@
; CHECK: define
; CHECK-NOT: undef
-; CHECK: %factor = mul i32 %tmp3, -2
+; CHECK: %factor = mul i32 %tmp3.neg, 2
; CHECK-NOT: undef
; CHECK: }
diff --git a/test/Transforms/Reassociate/vaarg_movable.ll b/test/Transforms/Reassociate/vaarg_movable.ll
new file mode 100644
index 000000000000..be4fe121fae9
--- /dev/null
+++ b/test/Transforms/Reassociate/vaarg_movable.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -reassociate -die < %s | FileCheck %s
+
+; The two va_arg instructions depend on the memory/context, are therfore not
+; identical and the sub should not be optimized to 0 by reassociate.
+;
+; CHECK-LABEL: @func(
+; ...
+; CHECK: %v0 = va_arg i8** %varargs, i32
+; CHECK: %v1 = va_arg i8** %varargs, i32
+; CHECK: %v0.neg = sub i32 0, %v0
+; CHECK: %sub = add i32 %v0.neg, 1
+; CHECK: %add = add i32 %sub, %v1
+; ...
+; CHECK: ret i32 %add
+define i32 @func(i32 %dummy, ...) {
+ %varargs = alloca i8*, align 8
+ %varargs1 = bitcast i8** %varargs to i8*
+ call void @llvm.va_start(i8* %varargs1)
+ %v0 = va_arg i8** %varargs, i32
+ %v1 = va_arg i8** %varargs, i32
+ %sub = sub nsw i32 %v1, %v0
+ %add = add nsw i32 %sub, 1
+ call void @llvm.va_end(i8* %varargs1)
+ ret i32 %add
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index a22689805fb5..0bed6f358808 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
%xor1 = xor i32 %xor, %and
ret i32 %xor1
; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %y, 123
-; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: %xor = xor i32 %x, 123
+; CHECK: %xor1 = xor i32 %xor, %y
; CHECK: ret i32 %xor1
}
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
index 1ff1abedd31c..3fd7fd9282f1 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
@@ -18,11 +18,11 @@ there:
merge:
; CHECK-LABEL: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
index f4292a998485..19f1423eea03 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
@@ -1,8 +1,9 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %next_x base %base_obj_x
-; CHECK: derived %next_y base %base_obj_y
-; CHECK: derived %next base %base_phi
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
declare i1 @runtime_value()
declare void @do_safepoint()
@@ -30,8 +31,8 @@ false:
merge:
%next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
index 2a7a7444adec..a28c925f7828 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
@@ -19,8 +19,8 @@ loop: ; preds = %loop, %entry
; CHECK-DAG: [ %next.relocated.casted, %loop ]
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
%next = getelementptr i64, i64 addrspace(1)* %current, i32 1
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
new file mode 100644
index 000000000000..5ebff642347d
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %select base @global
+
+@global = external addrspace(1) global i8
+
+define i8 @test(i1 %cond) gc "statepoint-example" {
+ %derived1 = getelementptr i8, i8 addrspace(1)* @global, i64 1
+ %derived2 = getelementptr i8, i8 addrspace(1)* @global, i64 2
+ %select = select i1 %cond, i8 addrspace(1)* %derived1, i8 addrspace(1)* %derived2
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NOT: relocate
+; CHECK: %load = load i8, i8 addrspace(1)* %select
+ %load = load i8, i8 addrspace(1)* %select
+ ret i8 %load
+}
+
+declare void @extern() gc "statepoint-example"
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
new file mode 100644
index 000000000000..8e43e638f989
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %derived base @global
+
+@global = external addrspace(1) global i8
+
+define i8 @test(i64 %offset) gc "statepoint-example" {
+ %derived = getelementptr i8, i8 addrspace(1)* @global, i64 %offset
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NOT: relocate
+; CHECK-NOT: remat
+; CHECK: %load = load i8, i8 addrspace(1)* %derived
+ %load = load i8, i8 addrspace(1)* %derived
+ ret i8 %load
+}
+
+declare void @extern() gc "statepoint-example"
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
index c2877d8b5393..802ce5d79a33 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
@@ -13,9 +13,9 @@ there:
merge:
%merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
index f72201b5138e..e0035d353887 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
@@ -12,9 +12,9 @@ loop:
%current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
%next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
%next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
index 90d91d21d7b4..4e0bb14cb453 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
@@ -1,7 +1,7 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %obj_to_consume base %base_phi
+; CHECK: derived %obj_to_consume base %obj_to_consume
declare void @foo()
declare i64 addrspace(1)* @generate_obj()
@@ -13,10 +13,10 @@ entry:
loop:
; CHECK: loop:
-; CHECK: %safepoint_token1 = call i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK: %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
; CHECK-NEXT: %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result
- %safepoint_token1 = call i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %safepoint_token1)
+ %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %safepoint_token1)
switch i32 %condition, label %dest_a [
i32 0, label %dest_b
i32 1, label %dest_c
@@ -33,21 +33,20 @@ dest_c:
merge:
; CHECK: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
%obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
- %safepoint_token3 = call i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token3 = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %merge.split
merge.split: ; preds = %merge
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
; Function Attrs: nounwind
-declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32) #0
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) \ No newline at end of file
+declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token) #0
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
index 9b73377e31cb..c5acd2962f9e 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @foo()
@@ -20,11 +20,11 @@ there:
merge:
; CHECK: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
index 0b785d72d63c..95a42846a2fe 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
@@ -30,12 +30,12 @@ there:
merge:
; CHECK: merge:
-; CHECK: %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
index 467429b4d27d..49cf20eab191 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
@@ -1,6 +1,6 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
@@ -24,7 +24,7 @@ bump_here_b:
merge_here:
; CHECK: merge_here:
-; CHECK-DAG: %base_phi
+; CHECK-DAG: %x.base
; CHECK-DAG: phi i64 addrspace(1)*
; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
@@ -37,16 +37,16 @@ there:
merge:
; CHECK: merge:
-; CHECK-DAG: %base_phi1
+; CHECK-DAG: %merged_value.base
; CHECK-DAG: phi i64 addrspace(1)*
; CHECK-DAG: %merge_here
; CHECK-DAG: [ %base_obj_y, %there ]
; CHECK: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %merged_value
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
index b89be7daa67b..e5ef42dda24b 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
@@ -24,7 +24,7 @@ check_for_null:
loop_back:
%next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
%next_index = add i32 %index, 1
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop_check
not_found:
@@ -35,4 +35,4 @@ found:
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
index 848633b2a275..946d89a08e27 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
@@ -13,9 +13,9 @@ loop:
%condition = call i1 @runtime_value()
%maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
%next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
index 879f3f9e24da..cd0473a67678 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
@@ -16,7 +16,7 @@ loop:
; CHECK-DAG: [ %obj.relocated.casted, %loop ]
; CHECK-DAG: [ %obj, %entry ]
call void @use_obj(i64 addrspace(1)* %obj)
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
@@ -55,9 +55,9 @@ define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1
merge:
; CHECK: merge:
-; CHECK-NEXT: %base_phi = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
+; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
%value = phi i64 addrspace(1)* [ %a.cast, %left], [ %a.cast, %left], [ %a.cast, %left], [ %b.cast, %right]
- %safepoint_token = call i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %value
}
@@ -74,16 +74,15 @@ entry:
loop: ; preds = %loop, %entry
; CHECK-LABEL: loop
-; CHECK: %base_phi = phi i64 addrspace(1)*
+; CHECK: %current.base = phi i64 addrspace(1)*
; CHECK-DAG: [ %base_obj, %entry ]
; Given the two selects are equivelent, so are their base phis - ideally,
; we'd have commoned these, but that's a missed optimization, not correctness.
-; CHECK-DAG: [ [[DISCARD:%base_select.*.relocated.casted]], %loop ]
-; CHECK-NOT: base_phi2
+; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
+; CHECK-NOT: extra.base
; CHECK: next = select
-; CHECK: base_select
+; CHECK: extra2.base = select
; CHECK: extra2 = select
-; CHECK: base_select
; CHECK: statepoint
;; Both 'next' and 'extra2' are live across the backedge safepoint...
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
@@ -91,10 +90,62 @@ loop: ; preds = %loop, %entry
%nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
%next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
%extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %loop
}
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ br i1 %cnd, label %merge, label %taken
+taken:
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+ br i1 %cnd, label %merge, label %taken
+taken:
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: br i1
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
+ br i1 %cnd, label %merge, label %next
+next:
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %bdv
+}
+
+
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
new file mode 100644
index 000000000000..6084efeb0509
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
@@ -0,0 +1,167 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s
+
+define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %obj)
+; Note that the second extractelement is actually redundant here. A correct output would
+; be to reuse the existing obj as a base since it is actually a base pointer.
+entry:
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2)
+ gc "statepoint-example" {
+; CHECK-LABEL: test2
+entry:
+ br i1 %cnd, label %taken, label %untaken
+taken:
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+untaken:
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+merge:
+ %vec = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
+ br i1 %cnd, label %taken2, label %untaken2
+taken2:
+ %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
+ br label %merge2
+untaken2:
+ %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
+ br label %merge2
+merge2:
+; CHECK-LABEL: merge2:
+; CHECK-NEXT: %obj = phi i64 addrspace(1)*
+; CHECK-NEXT: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj, %obj)
+ %obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr)
+ gc "statepoint-example" {
+; CHECK-LABEL: test3
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+; CHECK: insertelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %obj)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %obj
+}
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr)
+ gc "statepoint-example" {
+; CHECK-LABEL: test4
+entry:
+ %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
+ %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
+ %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %ptr)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %obj
+}
+
+declare void @use(i64 addrspace(1)*)
+
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %bdv)
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+; When we fundementally have to duplicate
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test6
+; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
+; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%bdv.base, %bdv)
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj2)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
+ br label %merge1
+merge1:
+; CHECK-LABEL: merge1:
+; CHECK: vec2.base
+; CHECK: vec2
+; CHECK: gep
+; CHECK: vec3.base
+; CHECK: vec3
+ %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
+ %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
+ %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ br i1 %cnd, label %merge1, label %next1
+next1:
+; CHECK-LABEL: next1:
+; CHECK: bdv.base =
+; CHECK: bdv =
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
+ br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: %objb.base
+; CHECK: %objb
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%objb.base, %objb)
+
+ %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
+ br i1 %cnd, label %merge, label %next
+next:
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i64 addrspace(1)* %objb
+}
+
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/basics.ll b/test/Transforms/RewriteStatepointsForGC/basics.ll
index 2a61924a5927..48f464356865 100644
--- a/test/Transforms/RewriteStatepointsForGC/basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/basics.ll
@@ -10,7 +10,7 @@ define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
@@ -23,8 +23,8 @@ define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
@@ -39,7 +39,7 @@ define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
entry:
%derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
%a = load i8, i8 addrspace(1)* %derived
%b = load i8, i8 addrspace(1)* %obj
@@ -57,14 +57,14 @@ taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %merge
untaken:
; CHECK-LABEL: untaken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %merge
merge:
@@ -81,8 +81,8 @@ define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
; CHECK-NEXT: gc.statepoint
; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
new file mode 100644
index 000000000000..8221cd0e0f82
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
@@ -0,0 +1,74 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+; A null test of a single value
+define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, null
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+safepoint:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+ br label %continue
+continue:
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+ br i1 %cond, label %taken, label %untaken
+taken:
+ ret i1 true
+untaken:
+ ret i1 false
+}
+
+; Comparing two pointers
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+safepoint:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+ br label %continue
+continue:
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %q.relocated, %safepoint ]
+; CHECK-DAG: [ %q, %entry ]
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+ br i1 %cond, label %taken, label %untaken
+taken:
+ ret i1 true
+untaken:
+ ret i1 false
+}
+
+; Sanity check that nothing bad happens if already last instruction
+; before terminator
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare)
+ gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+; CHECK: gc.statepoint
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %cond, label %taken, label %untaken
+taken:
+ ret i1 true
+untaken:
+ ret i1 false
+}
+
+declare void @safepoint()
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/constants.ll b/test/Transforms/RewriteStatepointsForGC/constants.ll
index a30fdd7034a4..b30f64beba09 100644
--- a/test/Transforms/RewriteStatepointsForGC/constants.ll
+++ b/test/Transforms/RewriteStatepointsForGC/constants.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -rewrite-statepoints-for-gc %s | FileCheck %s
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
; constants don't get relocated.
define i8 @test() gc "statepoint-example" {
@@ -9,7 +9,7 @@ define i8 @test() gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
ret i8 %res
}
@@ -22,7 +22,7 @@ define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: icmp
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%cmp = icmp eq i8 addrspace(1)* %p, null
br i1 %cmp, label %taken, label %not_taken
@@ -52,9 +52,44 @@ define i8 @test3(i1 %always_true) gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK-NEXT: load i8, i8 addrspace(1)* @G
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%res = load i8, i8 addrspace(1)* @G, align 1
ret i8 %res
}
+; Even for source languages without constant references, we can
+; see constants can show up along paths where the value is dead.
+; This is particular relevant when computing bases of PHIs.
+define i8 addrspace(1)* @test4(i8 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+ %is_null = icmp eq i8 addrspace(1)* %p, null
+ br i1 %is_null, label %split, label %join
+
+split:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %arg_value_addr.i = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
+ %arg_value_addr_casted.i = bitcast i8 addrspace(1)* %arg_value_addr.i to i8 addrspace(1)* addrspace(1)*
+ br label %join
+
+join:
+; CHECK-LABEL: join
+; CHECK: %addr2.base =
+ %addr2 = phi i8 addrspace(1)* addrspace(1)* [ %arg_value_addr_casted.i, %split ], [ inttoptr (i64 8 to i8 addrspace(1)* addrspace(1)*), %entry ]
+ ;; NOTE: This particular example can be jump-threaded, but in general,
+ ;; we can't, and have to deal with the resulting IR.
+ br i1 %is_null, label %early-exit, label %use
+
+early-exit:
+ ret i8 addrspace(1)* null
+
+use:
+; CHECK-LABEL: use:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %res = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %addr2, align 1
+ ret i8 addrspace(1)* %res
+}
+
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
new file mode 100644
index 000000000000..6af2a3012b5c
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition, label %here, label %there
+
+here: ; preds = %entry
+ %x = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %here
+; CHECK-LABEL: merge:
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
new file mode 100644
index 000000000000..8c486d6b3896
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
@@ -0,0 +1,35 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+
+declare i1 @runtime_value() "gc-leaf-function"
+
+declare void @do_safepoint()
+
+define void @select_of_phi(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y) gc "statepoint-example" {
+entry:
+ br label %loop
+
+loop: ; preds = %merge, %entry
+ %current_x = phi i64 addrspace(1)* [ %base_obj_x, %entry ], [ %next_x, %merge ]
+ %current_y = phi i64 addrspace(1)* [ %base_obj_y, %entry ], [ %next_y, %merge ]
+ %current = phi i64 addrspace(1)* [ null, %entry ], [ %next, %merge ]
+ %condition = call i1 @runtime_value()
+ %next_x = getelementptr i64, i64 addrspace(1)* %current_x, i32 1
+ %next_y = getelementptr i64, i64 addrspace(1)* %current_y, i32 1
+ br i1 %condition, label %true, label %false
+
+true: ; preds = %loop
+ br label %merge
+
+false: ; preds = %loop
+ br label %merge
+
+merge: ; preds = %false, %true
+ %next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
new file mode 100644
index 000000000000..ae793b2cb630
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
@@ -0,0 +1,24 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next base %base_obj
+
+declare void @do_safepoint()
+
+define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+ %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %loop
+
+loop: ; preds = %loop, %entry
+; CHECK-LABEL: loop:
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj.relocated.casted, %loop ]
+; CHECK-DAG: [ %base_obj, %entry ]
+; CHECK: %current = phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-DAG: [ %next.relocated.casted, %loop ]
+ %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
+ %next = getelementptr i64, i64 addrspace(1)* %current, i32 1
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
new file mode 100644
index 000000000000..2b9485388f80
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %base_obj
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition, label %merge, label %there
+
+there: ; preds = %entry
+ %derived_obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %merge
+
+merge: ; preds = %there, %entry
+ %merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
new file mode 100644
index 000000000000..71bb309d1301
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next.i64 base %base_obj
+
+define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+ %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next.i64, %loop ]
+ %current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
+ %next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
+ %next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
new file mode 100644
index 000000000000..3fcbf26a6fc0
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
@@ -0,0 +1,44 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %obj_to_consume base %obj_to_consume
+
+declare void @foo()
+
+declare i64 addrspace(1)* @generate_obj()
+
+declare void @consume_obj(i64 addrspace(1)*)
+
+define void @test(i32 %condition) gc "statepoint-example" {
+entry:
+ br label %loop
+
+loop: ; preds = %merge.split, %entry
+; CHECK: loop:
+; CHECK: [[TOKEN_0:%[^ ]+]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK-NEXT: [[RESULT_0:%[^ ]+]] = call i64 addrspace(1)* @llvm.experimental.gc.result
+ %0 = call i64 addrspace(1)* @generate_obj() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ switch i32 %condition, label %dest_a [
+ i32 0, label %dest_b
+ i32 1, label %dest_c
+ ]
+
+dest_a: ; preds = %loop
+ br label %merge
+
+dest_b: ; preds = %loop
+ br label %merge
+
+dest_c: ; preds = %loop
+ br label %merge
+
+merge: ; preds = %dest_c, %dest_b, %dest_a
+; CHECK: merge:
+; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ [[RESULT_0]], %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+ %obj_to_consume = phi i64 addrspace(1)* [ %0, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+ call void @consume_obj(i64 addrspace(1)* %obj_to_consume) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %merge.split
+
+merge.split: ; preds = %merge
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
new file mode 100644
index 000000000000..4d43d7f7307c
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @foo()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition, label %here, label %there
+
+here: ; preds = %entry
+ br label %bump
+
+bump: ; preds = %here
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %bump
+; CHECK: merge:
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
+; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
new file mode 100644
index 000000000000..2d555d179c29
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
@@ -0,0 +1,37 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition_x, label %here, label %there
+
+here: ; preds = %entry
+ br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+
+bump_here_a: ; preds = %here
+ %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+ br label %merge_here
+
+bump_here_b: ; preds = %here
+ %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 2
+ br label %merge_here
+
+merge_here: ; preds = %bump_here_b, %bump_here_a
+ %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %merge_here
+; CHECK: merge:
+; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
+; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
new file mode 100644
index 000000000000..e90ef63184ee
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
+entry:
+ br i1 %runtime_condition_x, label %here, label %there
+
+here: ; preds = %entry
+ br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+
+bump_here_a: ; preds = %here
+ %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+ br label %merge_here
+
+bump_here_b: ; preds = %here
+ %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 2
+ br label %merge_here
+
+merge_here: ; preds = %bump_here_b, %bump_here_a
+; CHECK: merge_here:
+; CHECK-DAG: %x.base
+; CHECK-DAG: phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
+; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
+ %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
+ br label %merge
+
+there: ; preds = %entry
+ %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+ br label %merge
+
+merge: ; preds = %there, %merge_here
+; CHECK: merge:
+; CHECK-DAG: %merged_value.base
+; CHECK-DAG: phi i64 addrspace(1)*
+; CHECK-DAG: %merge_here
+; CHECK-DAG: [ %base_obj_y, %there ]
+; CHECK: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
new file mode 100644
index 000000000000..628696ba2c2f
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
@@ -0,0 +1,37 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next_element_ptr base %array_obj
+
+define i32 @null_in_array(i64 addrspace(1)* %array_obj) gc "statepoint-example" {
+entry:
+ %array_len_pointer.i64 = getelementptr i64, i64 addrspace(1)* %array_obj, i32 1
+ %array_len_pointer.i32 = bitcast i64 addrspace(1)* %array_len_pointer.i64 to i32 addrspace(1)*
+ %array_len = load i32, i32 addrspace(1)* %array_len_pointer.i32
+ %array_elems = bitcast i32 addrspace(1)* %array_len_pointer.i32 to i64 addrspace(1)* addrspace(1)*
+ br label %loop_check
+
+loop_check: ; preds = %loop_back, %entry
+ %index = phi i32 [ 0, %entry ], [ %next_index, %loop_back ]
+ %current_element_ptr = phi i64 addrspace(1)* addrspace(1)* [ %array_elems, %entry ], [ %next_element_ptr, %loop_back ]
+ %index_lt = icmp ult i32 %index, %array_len
+ br i1 %index_lt, label %check_for_null, label %not_found
+
+check_for_null: ; preds = %loop_check
+ %current_element = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr
+ %is_null = icmp eq i64 addrspace(1)* %current_element, null
+ br i1 %is_null, label %found, label %loop_back
+
+loop_back: ; preds = %check_for_null
+ %next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
+ %next_index = add i32 %index, 1
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop_check
+
+not_found: ; preds = %loop_check
+ ret i32 -1
+
+found: ; preds = %check_for_null
+ ret i32 %index
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
new file mode 100644
index 000000000000..a82af3b96892
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next base %base_obj
+
+declare i1 @runtime_value() "gc-leaf-function"
+
+define void @maybe_GEP(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %current = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %next, %loop ]
+ %condition = call i1 @runtime_value()
+ %maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
+ %next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
new file mode 100644
index 000000000000..a378d1502add
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
@@ -0,0 +1,151 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+; The rewriting needs to make %obj loop variant by inserting a phi
+; of the original value and it's relocation.
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @use_obj(i64 addrspace(1)*) "gc-leaf-function"
+
+define void @def_use_safepoint() gc "statepoint-example" {
+; CHECK-LABEL: def_use_safepoint
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj.relocated.casted, %loop ]
+; CHECK-DAG: [ %obj, %entry ]
+entry:
+ %obj = call i64 addrspace(1)* @generate_obj()
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call void @use_obj(i64 addrspace(1)* %obj)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+declare void @do_safepoint()
+
+declare void @parse_point(i64 addrspace(1)*)
+
+define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i32 %unknown) gc "statepoint-example" {
+; CHECK-LABEL: test1
+entry:
+ br i1 undef, label %left, label %right
+
+left: ; preds = %entry
+; CHECK: left:
+; CHECK-NEXT: %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+; CHECK-NEXT: [[CAST_L:%.*]] = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+; Our safepoint placement pass calls removeUnreachableBlocks, which does a bunch
+; of simplifications to branch instructions. This bug is visible only when
+; there are multiple branches into the same block from the same predecessor, and
+; the following ceremony is to make that artefact survive a call to
+; removeUnreachableBlocks. As an example, "br i1 undef, label %merge, label %merge"
+; will get simplified to "br label %merge" by removeUnreachableBlocks.
+ %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+ switch i32 %unknown, label %right [
+ i32 0, label %merge
+ i32 1, label %merge
+ i32 5, label %merge
+ i32 3, label %right
+ ]
+
+right: ; preds = %left, %left, %entry
+; CHECK: right:
+; CHECK-NEXT: %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+; CHECK-NEXT: [[CAST_R:%.*]] = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+ %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+ br label %merge
+
+merge: ; preds = %right, %left, %left, %left
+; CHECK: merge:
+; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
+ %value = phi i64 addrspace(1)* [ %a.cast, %left ], [ %a.cast, %left ], [ %a.cast, %left ], [ %b.cast, %right ]
+ call void @parse_point(i64 addrspace(1)* %value) [ "deopt"(i32 0, i32 0, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %value
+}
+
+;; The purpose of this test is to ensure that when two live values share a
+;; base defining value with inherent conflicts, we end up with a *single*
+;; base phi/select per such node. This is testing an optimization, not a
+;; fundemental correctness criteria
+define void @test2(i1 %cnd, i64 addrspace(1)* %base_obj, i64 addrspace(1)* %base_arg2) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+ br label %loop
+; CHECK-LABEL: loop
+; CHECK: %current.base = phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj, %entry ]
+
+; Given the two selects are equivelent, so are their base phis - ideally,
+; we'd have commoned these, but that's a missed optimization, not correctness.
+; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
+; CHECK-NOT: extra.base
+; CHECK: next = select
+; CHECK: extra2.base = select
+; CHECK: extra2 = select
+; CHECK: statepoint
+;; Both 'next' and 'extra2' are live across the backedge safepoint...
+
+loop: ; preds = %loop, %entry
+ %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
+ %extra = phi i64 addrspace(1)* [ %obj, %entry ], [ %extra2, %loop ]
+ %nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
+ %next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
+ %extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %loop
+}
+
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ br i1 %cnd, label %merge, label %taken
+
+taken: ; preds = %entry
+ br label %merge
+
+merge: ; preds = %taken, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+ br i1 %cnd, label %merge, label %taken
+
+taken: ; preds = %entry
+ br label %merge
+
+merge: ; preds = %taken, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+ br label %merge
+
+merge: ; preds = %merge, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: br i1
+ %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
+ br i1 %cnd, label %merge, label %next
+
+next: ; preds = %merge
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %bdv
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
new file mode 100644
index 000000000000..96b7390b77bc
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
@@ -0,0 +1,167 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+
+
+define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %obj)
+; Note that the second extractelement is actually redundant here. A correct output would
+; be to reuse the existing obj as a base since it is actually a base pointer.
+entry:
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) gc "statepoint-example" {
+; CHECK-LABEL: test2
+entry:
+ br i1 %cnd, label %taken, label %untaken
+
+taken: ; preds = %entry
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+untaken: ; preds = %entry
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+ %vec = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ br i1 %cnd, label %taken2, label %untaken2
+
+taken2: ; preds = %merge
+ %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
+ br label %merge2
+
+untaken2: ; preds = %merge
+ %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
+ br label %merge2
+
+merge2: ; preds = %untaken2, %taken2
+; CHECK-LABEL: merge2:
+; CHECK-NEXT: %obj = phi i64 addrspace(1)*
+; CHECK-NEXT: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj, %obj)
+ %obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: insertelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %obj)
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %ptr)
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
+entry:
+ %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
+ %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
+ %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+declare void @use(i64 addrspace(1)*) "gc-leaf-function"
+
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %bdv)
+; When we fundementally have to duplicate
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
+; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%bdv.base, %bdv)
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
+entry:
+ %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @use(i64 addrspace(1)* %bdv)
+ ret void
+}
+
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
+ br label %merge1
+
+merge1: ; preds = %merge1, %entry
+; CHECK-LABEL: merge1:
+; CHECK: vec2.base
+; CHECK: vec2
+; CHECK: gep
+; CHECK: vec3.base
+; CHECK: vec3
+ %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
+ %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
+ %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+ br i1 %cnd, label %merge1, label %next1
+
+next1: ; preds = %merge1
+; CHECK-LABEL: next1:
+; CHECK: bdv.base =
+; CHECK: bdv =
+ %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
+ br label %merge
+
+merge: ; preds = %merge, %next1
+; CHECK-LABEL: merge:
+; CHECK: %objb.base
+; CHECK: %objb
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%objb.base, %objb)
+ %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
+ br i1 %cnd, label %merge, label %next
+
+next: ; preds = %merge
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64 addrspace(1)* %objb
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
new file mode 100644
index 000000000000..c0dc6940e5db
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+declare void @g()
+declare i32 @h()
+
+define i32 addrspace(1)* @f0(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f0(
+ entry:
+; CHECK: [[TOKEN_0:%[^ ]+]] = call token {{[^@]*}} @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ call void @g() [ "deopt"(i32 100) ]
+
+; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_0]], i32 8, i32 8)
+ ret i32 addrspace(1)* %arg
+}
+
+define i32 addrspace(1)* @f1(i32 addrspace(1)* %arg) gc "statepoint-example" personality i32 8 {
+; CHECK-LABEL: @f1(
+ entry:
+; CHECK: [[TOKEN_1:%[^ ]+]] = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ invoke void @g() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest
+
+ normal_dest:
+; CHECK: %arg.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_1]], i32 8, i32 8)
+ ret i32 addrspace(1)* %arg
+
+ unwind_dest:
+ %lpad = landingpad token cleanup
+ resume token undef
+}
+
+define i32 addrspace(1)* @f2(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f2(
+ entry:
+; CHECK: [[TOKEN_2:%[^ ]+]] = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ %val = call i32 @h() [ "deopt"(i32 100) ]
+
+; CHECK: [[RESULT_F2:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_2]])
+; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_2]], i32 8, i32 8)
+; CHECK: %arg.relocated.casted = bitcast i8 addrspace(1)* %arg.relocated to i32 addrspace(1)*
+
+ store i32 %val, i32 addrspace(1)* %arg
+; CHECK: store i32 [[RESULT_F2]], i32 addrspace(1)* %arg.relocated.casted
+ ret i32 addrspace(1)* %arg
+}
+
+define i32 addrspace(1)* @f3(i32 addrspace(1)* %arg) gc "statepoint-example" personality i32 8 {
+; CHECK-LABEL: @f3(
+ entry:
+; CHECK: [[TOKEN_3:%[^ ]+]] = invoke token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+ %val = invoke i32 @h() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest
+
+ normal_dest:
+; CHECK: [[RESULT_F3:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_3]])
+; CHECK: [[ARG_RELOCATED:%[^ ]+]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_3]], i32 8, i32 8)
+; CHECK: [[ARG_RELOCATED_CASTED:%[^ ]+]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)*
+
+ store i32 %val, i32 addrspace(1)* %arg
+
+; CHECK: store i32 [[RESULT_F3]], i32 addrspace(1)* [[ARG_RELOCATED_CASTED]]
+ ret i32 addrspace(1)* %arg
+
+ unwind_dest:
+ %lpad = landingpad token cleanup
+ resume token undef
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
new file mode 100644
index 000000000000..48c45eaa1b01
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
@@ -0,0 +1,88 @@
+; This is a collection of really basic tests for gc.statepoint rewriting.
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S | FileCheck %s
+
+; Trivial relocation over a single call
+
+declare void @foo()
+
+define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; Two safepoints in a row (i.e. consistent liveness)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %obj
+}
+
+define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; A simple derived pointer
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %obj
+}
+
+define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: load i8, i8 addrspace(1)* %derived.relocated
+; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
+; Tests to make sure we visit both the taken and untaken predeccessor
+; of merge. This was a bug in the dataflow liveness at one point.
+ %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ %a = load i8, i8 addrspace(1)* %derived
+ %b = load i8, i8 addrspace(1)* %obj
+ %c = sub i8 %a, %b
+ ret i8 %c
+}
+
+define i8 addrspace(1)* @test4(i1 %cmp, i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i8 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated2, %untaken ]
+; CHECK-NEXT: ret i8 addrspace(1)* %.0
+; When run over a function which doesn't opt in, should do nothing!
+ ret i8 addrspace(1)* %obj
+}
+
+define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
+; CHECK-LABEL: @test5
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
+ %0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %obj
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
new file mode 100644
index 000000000000..f0da0c06db0a
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
@@ -0,0 +1,81 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
+
+; A null test of a single value
+
+define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, null
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint: ; preds = %entry
+ call void @safepoint() [ "deopt"() ]
+ br label %continue
+
+continue: ; preds = %safepoint, %entry
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+; Comparing two pointers
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %continue
+ ret i1 true
+
+untaken: ; preds = %continue
+ ret i1 false
+}
+
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint: ; preds = %entry
+ call void @safepoint() [ "deopt"() ]
+ br label %continue
+
+continue: ; preds = %safepoint, %entry
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %q.relocated, %safepoint ]
+; CHECK-DAG: [ %q, %entry ]
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+; Sanity check that nothing bad happens if already last instruction
+; before terminator
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %continue
+ ret i1 true
+
+untaken: ; preds = %continue
+ ret i1 false
+}
+
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+entry:
+ call void @safepoint() [ "deopt"() ]
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %entry
+ ret i1 true
+
+untaken: ; preds = %entry
+ ret i1 false
+}
+
+declare void @safepoint()
+!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
new file mode 100644
index 000000000000..eede1b09d161
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles %s | FileCheck %s
+
+; constants don't get relocated.
+@G = addrspace(1) global i8 5
+
+declare void @foo()
+
+define i8 @test() gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: gc.statepoint
+; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+; Mostly just here to show reasonable code test can come from.
+entry:
+ call void @foo() [ "deopt"() ]
+ %res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+ ret i8 %res
+}
+
+define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: icmp
+; Globals don't move and thus don't get relocated
+entry:
+ call void @foo() [ "deopt"() ]
+ %cmp = icmp eq i8 addrspace(1)* %p, null
+ br i1 %cmp, label %taken, label %not_taken
+
+taken: ; preds = %not_taken, %entry
+ ret i8 0
+
+not_taken: ; preds = %entry
+ %cmp2 = icmp ne i8 addrspace(1)* %p, null
+ br i1 %cmp2, label %taken, label %dead
+
+dead: ; preds = %not_taken
+ %addr = getelementptr i8, i8 addrspace(1)* %p, i32 15
+ %res = load i8, i8 addrspace(1)* %addr
+ ret i8 %res
+}
+
+define i8 @test3(i1 %always_true) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK-NEXT: load i8, i8 addrspace(1)* @G
+entry:
+ call void @foo() [ "deopt"() ]
+ %res = load i8, i8 addrspace(1)* @G, align 1
+ ret i8 %res
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
new file mode 100644
index 000000000000..f04c6784a878
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
@@ -0,0 +1,104 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+
+declare void @foo()
+
+declare i8 addrspace(1)* @some_function() "gc-leaf-function"
+
+declare void @some_function_consumer(i8 addrspace(1)*) "gc-leaf-function"
+
+declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref() "gc-leaf-function"
+declare noalias i8 addrspace(1)* @some_function_ret_noalias() "gc-leaf-function"
+
+define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+ %a = call dereferenceable(4) i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_or_null_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+ %a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+ %a = call noalias i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_md(
+; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+entry:
+ %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 %tmp
+}
+
+define i8 addrspace(1)* @test_decl_only_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_attribute(
+; No change here, but the prototype of some_function_ret_deref should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
+entry:
+ %a = call i8 addrspace(1)* @some_function_ret_deref()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_noalias(
+; No change here, but the prototype of some_function_ret_noalias should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+entry:
+ %a = call i8 addrspace(1)* @some_function_ret_noalias()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_attribute(
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"red", !2}
+; CHECK: !2 = !{!"blue"}
+entry:
+ call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) noalias %ptr)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %ptr
+}
+!0 = !{!1, !1, i64 0, i64 1}
+!1 = !{!"red", !2}
+!2 = !{!"blue"}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
new file mode 100644
index 000000000000..0d53af704df2
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+; This test is to verify gc.relocate can handle pointer to vector of
+; pointers (<2 x i32 addrspace(1)*> addrspace(1)* in this case).
+; The old scheme to create a gc.relocate of <2 x i32 addrspace(1)*> addrspace(1)*
+; type will fail because llvm does not support mangling vector of pointers.
+; The new scheme will create all gc.relocate to i8 addrspace(1)* type and
+; then bitcast to the correct type.
+
+declare void @foo()
+
+declare void @use(...) "gc-leaf-function"
+
+define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK-NEXT: %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
+
+ call void @foo() [ "deopt"() ]
+ call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
+ ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
new file mode 100644
index 000000000000..00f28938cee9
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
@@ -0,0 +1,149 @@
+; Test that we can correctly handle vectors of pointers in statepoint
+; rewriting. Currently, we scalarize, but that's an implementation detail.
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+
+; A non-vector relocation for comparison
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
+; A base vector from a argument
+entry:
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; A base vector from a load
+entry:
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; When a statepoint is an invoke rather than a call
+entry:
+ %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare i32 @fake_personality_function()
+
+define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test4
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+entry:
+ %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ invoke void @do_safepoint() [ "deopt"() ]
+ to label %normal_return unwind label %exceptional_return
+
+normal_return: ; preds = %entry
+; CHECK-LABEL: normal_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+ ret <2 x i64 addrspace(1)*> %obj
+
+exceptional_return: ; preds = %entry
+; CHECK-LABEL: exceptional_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %13
+; Can we handle an insert element with a constant offset? This effectively
+; tests both the equal and inequal case since we have to relocate both indices
+; in the vector.
+ %landing_pad4 = landingpad token
+ cleanup
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK: insertelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; A base vector from a load
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %vec
+}
+
+define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test6
+entry:
+ br i1 %cnd, label %taken, label %untaken
+
+taken: ; preds = %entry
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+untaken: ; preds = %entry
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: = phi
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+ %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
new file mode 100644
index 000000000000..c5b213f4c82d
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
@@ -0,0 +1,165 @@
+; A collection of liveness test cases to ensure we're reporting the
+; correct live values at statepoints
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S < %s | FileCheck %s
+
+; Tests to make sure we consider %obj live in both the taken and untaken
+; predeccessor of merge.
+
+define i64 addrspace(1)* @test1(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated.casted, %taken ], [ %obj.relocated2.casted, %untaken ]
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+; A local kill should not effect liveness in predecessor block
+ ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: br
+ call void @foo() [ "deopt"() ]
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: %obj = load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
+; A local kill should effect values live from a successor phi. Also, we
+; should only propagate liveness from a phi to the appropriate predecessors.
+ %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+ call void @foo() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+
+untaken: ; preds = %entry
+ ret i64 addrspace(1)* null
+}
+
+define i64 addrspace(1)* @test3(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj = load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: br label %merge
+; A base pointer must be live if it is needed at a later statepoint,
+; even if the base pointer is otherwise unused.
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+ %phi = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+ ret i64 addrspace(1)* %phi
+}
+
+define i64 addrspace(1)* @test4(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: %derived = getelementptr
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated =
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: %obj.relocated =
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated2 =
+; CHECK-NEXT: bitcast
+
+; Note: It's legal to relocate obj again, but not strictly needed
+; CHECK-NEXT: %obj.relocated3 =
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated2.casted
+;
+; Make sure that a phi def visited during iteration is considered a kill.
+; Also, liveness after base pointer analysis can change based on new uses,
+; not just new defs.
+ %derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
+ call void @foo() [ "deopt"() ]
+ call void @foo() [ "deopt"() ]
+ ret i64 addrspace(1)* %derived
+}
+
+declare void @consume(...) readonly "gc-leaf-function"
+
+define i64 addrspace(1)* @test5(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+ br i1 %cmp, label %taken, label %untaken
+
+taken: ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+ call void @foo() [ "deopt"() ]
+ br label %merge
+
+untaken: ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: br label %merge
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)*
+; CHECK-NEXT: %obj2a = phi
+; CHECK-NEXT: @consume
+; CHECK-NEXT: br label %final
+ %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+ call void (...) @consume(i64 addrspace(1)* %obj2a)
+ br label %final
+
+final: ; preds = %merge
+; CHECK-LABEL: final:
+; CHECK-NEXT: @consume
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+ call void (...) @consume(i64 addrspace(1)* %obj2a)
+ ret i64 addrspace(1)* %obj
+}
+
+declare void @foo()
+
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
new file mode 100644
index 000000000000..8f5c0ff4a710
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+declare void @f()
+declare i32 @personality_function()
+
+define void @test_id() gc "statepoint-example" personality i32 ()* @personality_function {
+; CHECK-LABEL: @test_id(
+entry:
+; CHECK-LABEL: entry:
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
+ invoke void @f() "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ret void
+
+exceptional_return:
+ %landing_pad4 = landingpad {i8*, i32} cleanup
+ ret void
+}
+
+define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()* @personality_function {
+; CHECK-LABEL: @test_num_patch_bytes(
+entry:
+; CHECK-LABEL: entry:
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
+ invoke void @f() "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ret void
+
+exceptional_return:
+ %landing_pad4 = landingpad {i8*, i32} cleanup
+ ret void
+}
+
+declare void @do_safepoint()
+define void @gc.safepoint_poll() {
+entry:
+ call void @do_safepoint()
+ ret void
+}
+
+; CHECK-NOT: statepoint-id
+; CHECK-NOT: statepoint-num-patch_bytes
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
new file mode 100644
index 000000000000..e0bd542aa5d5
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
@@ -0,0 +1,62 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
+
+; Test to make sure we destroy LCSSA's single entry phi nodes before
+; running liveness
+
+declare void @consume(...) "gc-leaf-function"
+
+define void @test6(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+ br label %next
+
+next: ; preds = %entry
+; CHECK-LABEL: next:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; Need to delete unreachable gc.statepoint call
+ %obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
+ call void @foo() [ "deopt"() ]
+ call void (...) @consume(i64 addrspace(1)* %obj2)
+ call void (...) @consume(i64 addrspace(1)* %obj)
+ ret void
+}
+
+define void @test7() gc "statepoint-example" {
+; CHECK-LABEL: test7
+; CHECK-NOT: gc.statepoint
+; Need to delete unreachable gc.statepoint invoke - tested seperately given
+; a correct implementation could only remove the instructions, not the block
+ ret void
+
+unreached: ; preds = %unreached
+ %obj = phi i64 addrspace(1)* [ null, %unreached ]
+ call void @foo() [ "deopt"() ]
+ call void (...) @consume(i64 addrspace(1)* %obj)
+ br label %unreached
+}
+
+define void @test8() gc "statepoint-example" personality i32 ()* undef {
+; CHECK-LABEL: test8
+; CHECK-NOT: gc.statepoint
+; Bound the last check-not
+ ret void
+
+unreached: ; No predecessors!
+ invoke void @foo() [ "deopt"() ]
+; CHECK-LABEL: @foo
+ to label %normal_return unwind label %exceptional_return
+
+normal_return: ; preds = %unreached
+ ret void
+
+exceptional_return: ; preds = %unreached
+ %landing_pad4 = landingpad { i8*, i32 }
+ cleanup
+ ret void
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
new file mode 100644
index 000000000000..688cf36168d4
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
@@ -0,0 +1,32 @@
+
+;; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+;; This test is to verify that RewriteStatepointsForGC correctly relocates values
+;; defined by invoke instruction results.
+
+declare i64* addrspace(1)* @non_gc_call() "gc-leaf-function"
+
+declare void @gc_call()
+
+declare i32* @fake_personality_function()
+
+define i64* addrspace(1)* @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
+; CHECK-LABEL: @test(
+
+entry:
+ %obj = invoke i64* addrspace(1)* @non_gc_call()
+ to label %normal_dest unwind label %unwind_dest
+
+unwind_dest: ; preds = %entry
+ %lpad = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } undef
+
+normal_dest: ; preds = %entry
+; CHECK: normal_dest:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+
+ call void @gc_call() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i64* addrspace(1)* %obj
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
new file mode 100644
index 000000000000..584dc32b7529
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
@@ -0,0 +1,279 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S 2>&1 | FileCheck %s
+
+
+declare void @foo()
+
+declare void @use(...) "gc-leaf-function"
+
+define i64 addrspace(1)* @test1(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+; CHECK-DAG: %obj.relocated
+; CHECK-DAG: %obj2.relocated
+entry:
+ call void @foo() [ "deopt"() ]
+ br label %joint
+
+joint: ; preds = %joint2, %entry
+; CHECK-LABEL: joint:
+; CHECK: %phi1 = phi i64 addrspace(1)* [ %obj.relocated.casted, %entry ], [ %obj3, %joint2 ]
+ %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj3, %joint2 ]
+ br i1 %condition, label %use, label %joint2
+
+use: ; preds = %joint
+ br label %joint2
+
+joint2: ; preds = %use, %joint
+; CHECK-LABEL: joint2:
+; CHECK: %phi2 = phi i64 addrspace(1)* [ %obj.relocated.casted, %use ], [ %obj2.relocated.casted, %joint ]
+; CHECK: %obj3 = getelementptr i64, i64 addrspace(1)* %obj2.relocated.casted, i32 1
+ %phi2 = phi i64 addrspace(1)* [ %obj, %use ], [ %obj2, %joint ]
+ %obj3 = getelementptr i64, i64 addrspace(1)* %obj2, i32 1
+ br label %joint
+}
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @consume_obj(i64 addrspace(1)*) "gc-leaf-function"
+
+declare i1 @rt() "gc-leaf-function"
+
+define void @test2() gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+ %obj_init = call i64 addrspace(1)* @generate_obj()
+ %obj = getelementptr i64, i64 addrspace(1)* %obj_init, i32 42
+ br label %loop
+
+loop: ; preds = %loop.backedge, %entry
+; CHECK: loop:
+; CHECK-DAG: [ %obj_init.relocated.casted, %loop.backedge ]
+; CHECK-DAG: [ %obj_init, %entry ]
+; CHECK-DAG: [ %obj.relocated.casted, %loop.backedge ]
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-NOT: %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+ %index = phi i32 [ 0, %entry ], [ %index.inc, %loop.backedge ]
+ %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+ call void @consume_obj(i64 addrspace(1)* %location)
+ %index.inc = add i32 %index, 1
+ %condition = call i1 @rt()
+ br i1 %condition, label %loop_x, label %loop_y
+
+loop_x: ; preds = %loop
+ br label %loop.backedge
+
+loop.backedge: ; preds = %loop_y, %loop_x
+ call void @do_safepoint() [ "deopt"() ]
+ br label %loop
+
+loop_y: ; preds = %loop
+ br label %loop.backedge
+}
+
+declare void @some_call(i8 addrspace(1)*) "gc-leaf-function"
+
+define void @relocate_merge(i1 %cnd, i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @relocate_merge
+
+bci_0:
+ br i1 %cnd, label %if_branch, label %else_branch
+
+if_branch: ; preds = %bci_0
+; CHECK-LABEL: if_branch:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+ call void @foo() [ "deopt"() ]
+ br label %join
+
+else_branch: ; preds = %bci_0
+; CHECK-LABEL: else_branch:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; We need to end up with a single relocation phi updated from both paths
+ call void @foo() [ "deopt"() ]
+ br label %join
+
+join: ; preds = %else_branch, %if_branch
+; CHECK-LABEL: join:
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg.relocated, %if_branch ]
+; CHECK-DAG: [ %arg.relocated2, %else_branch ]
+; CHECK-NOT: phi
+ call void @some_call(i8 addrspace(1)* %arg)
+ ret void
+}
+
+; Make sure a use in a statepoint gets properly relocated at a previous one.
+; This is basically just making sure that statepoints aren't accidentally
+; treated specially.
+define void @test3(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.statepoint
+entry:
+ call void undef(i64 undef) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ %0 = call i32 undef(i64 addrspace(1)* %obj) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret void
+}
+
+; Check specifically for the case where the result of a statepoint needs to
+; be relocated itself
+define void @test4() gc "statepoint-example" {
+; CHECK-LABEL: @test4
+; CHECK: gc.statepoint
+; CHECK: gc.result
+; CHECK: gc.statepoint
+; CHECK: [[RELOCATED:%[^ ]+]] = call {{.*}}gc.relocate
+; CHECK: @use(i8 addrspace(1)* [[RELOCATED]])
+ %1 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ %2 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ call void (...) @use(i8 addrspace(1)* %1)
+ unreachable
+}
+
+; Test updating a phi where not all inputs are live to begin with
+define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: test5
+entry:
+ %0 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ switch i32 undef, label %kill [
+ i32 10, label %merge
+ i32 13, label %merge
+ ]
+
+kill: ; preds = %entry
+ br label %merge
+
+merge: ; preds = %kill, %entry, %entry
+; CHECK: merge:
+; CHECK: %test = phi i8 addrspace(1)
+; CHECK-DAG: [ null, %kill ]
+; CHECK-DAG: [ %arg.relocated, %entry ]
+; CHECK-DAG: [ %arg.relocated, %entry ]
+ %test = phi i8 addrspace(1)* [ null, %kill ], [ %arg, %entry ], [ %arg, %entry ]
+ call void (...) @use(i8 addrspace(1)* %test)
+ unreachable
+}
+
+; Check to make sure we handle values live over an entry statepoint
+define void @test6(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+ br i1 undef, label %gc.safepoint_poll.exit2, label %do_safepoint
+
+do_safepoint: ; preds = %entry
+; CHECK-LABEL: do_safepoint:
+; CHECK: gc.statepoint
+; CHECK: arg1.relocated =
+; CHECK: arg2.relocated =
+; CHECK: arg3.relocated =
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) ]
+ br label %gc.safepoint_poll.exit2
+
+gc.safepoint_poll.exit2: ; preds = %do_safepoint, %entry
+; CHECK-LABEL: gc.safepoint_poll.exit2:
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg3, %entry ]
+; CHECK-DAG: [ %arg3.relocated, %do_safepoint ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg2, %entry ]
+; CHECK-DAG: [ %arg2.relocated, %do_safepoint ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg1, %entry ]
+; CHECK-DAG: [ %arg1.relocated, %do_safepoint ]
+ call void (...) @use(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+ ret void
+}
+
+; Check relocation in a loop nest where a relocation happens in the outer
+; but not the inner loop
+define void @test_outer_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
+; CHECK-LABEL: @test_outer_loop
+
+bci_0:
+ br label %outer-loop
+
+outer-loop: ; preds = %outer-inc, %bci_0
+; CHECK-LABEL: outer-loop:
+; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
+; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
+ br label %inner-loop
+
+inner-loop: ; preds = %inner-loop, %outer-loop
+ br i1 %cmp, label %inner-loop, label %outer-inc
+
+outer-inc: ; preds = %inner-loop
+; CHECK-LABEL: outer-inc:
+; CHECK: %arg1.relocated
+; CHECK: %arg2.relocated
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
+ br label %outer-loop
+}
+
+; Check that both inner and outer loops get phis when relocation is in
+; inner loop
+define void @test_inner_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
+; CHECK-LABEL: @test_inner_loop
+
+bci_0:
+ br label %outer-loop
+
+outer-loop: ; preds = %outer-inc, %bci_0
+; CHECK-LABEL: outer-loop:
+; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
+; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
+ br label %inner-loop
+; CHECK-LABEL: inner-loop
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: %outer-loop ]
+; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: %outer-loop ]
+; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
+; CHECK: gc.statepoint
+; CHECK: %arg1.relocated
+; CHECK: %arg2.relocated
+
+inner-loop: ; preds = %inner-loop, %outer-loop
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
+ br i1 %cmp, label %inner-loop, label %outer-inc
+
+outer-inc: ; preds = %inner-loop
+; CHECK-LABEL: outer-inc:
+; This test shows why updating just those uses of the original value being
+; relocated dominated by the inserted relocation is not always sufficient.
+ br label %outer-loop
+}
+
+define i64 addrspace(1)* @test7(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+ br i1 %condition, label %branch2, label %join
+
+branch2: ; preds = %entry
+ br i1 %condition, label %callbb, label %join2
+
+callbb: ; preds = %branch2
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ br label %join
+
+join: ; preds = %callbb, %entry
+; CHECK-LABEL: join:
+; CHECK: phi i64 addrspace(1)* [ %obj.relocated.casted, %callbb ], [ %obj, %entry ]
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-DAG: [ %obj2.relocated.casted, %callbb ]
+ %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %callbb ]
+ br label %join2
+
+join2: ; preds = %join, %branch2
+; CHECK-LABEL: join2:
+; CHECK: phi2 = phi i64 addrspace(1)*
+; CHECK-DAG: %join ]
+; CHECK-DAG: [ %obj2, %branch2 ]
+ %phi2 = phi i64 addrspace(1)* [ %obj, %join ], [ %obj2, %branch2 ]
+ ret i64 addrspace(1)* %phi2
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
new file mode 100644
index 000000000000..0020c5116c13
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
@@ -0,0 +1,150 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+
+declare void @use_obj16(i16 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj32(i32 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj64(i64 addrspace(1)*) "gc-leaf-function"
+
+declare void @do_safepoint()
+
+define void @test_gep_const(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_const
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ ret void
+}
+
+define void @test_gep_idx(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_idx
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ ret void
+}
+
+define void @test_bitcast(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast
+entry:
+ %ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj64(i64 addrspace(1)* %ptr)
+ ret void
+}
+
+define void @test_bitcast_gep(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast_gep
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ ret void
+}
+
+define void @test_intersecting_chains(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: test_intersecting_chains
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+ %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+ ret void
+}
+
+define void @test_cost_threshold(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" {
+; CHECK-LABEL: test_cost_threshold
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 %idx1
+ %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 %idx2
+ %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 %idx3
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ ret void
+}
+
+define void @test_two_derived(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_two_derived
+entry:
+ %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ call void @use_obj32(i32 addrspace(1)* %ptr2)
+ ret void
+}
+
+define void @test_gep_smallint_array([3 x i32] addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_smallint_array
+entry:
+ %ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %ptr)
+ ret void
+}
+
+declare i32 @fake_personality_function()
+
+define void @test_invoke(i32 addrspace(1)* %base) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test_invoke
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+ %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
+ invoke void @do_safepoint() [ "deopt"() ]
+ to label %normal unwind label %exception
+
+normal: ; preds = %entry
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+ ret void
+
+exception: ; preds = %entry
+ %landing_pad4 = landingpad token
+ cleanup
+ call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+ call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+ ret void
+}
+
+define void @test_loop(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_loop
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ call void @use_obj32(i32 addrspace(1)* %ptr.gep)
+ call void @do_safepoint() [ "deopt"() ]
+ br label %loop
+}
+
+define void @test_too_long(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_too_long
+entry:
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.gep1 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 15
+ %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep1, i32 15
+ %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 15
+ %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 15
+ %ptr.gep5 = getelementptr i32, i32 addrspace(1)* %ptr.gep4, i32 15
+ %ptr.gep6 = getelementptr i32, i32 addrspace(1)* %ptr.gep5, i32 15
+ %ptr.gep7 = getelementptr i32, i32 addrspace(1)* %ptr.gep6, i32 15
+ %ptr.gep8 = getelementptr i32, i32 addrspace(1)* %ptr.gep7, i32 15
+ %ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
+ %ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
+ %ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
+ call void @do_safepoint() [ "deopt"() ]
+ call void @use_obj32(i32 addrspace(1)* %ptr.gep11)
+ ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
new file mode 100644
index 000000000000..e1d0140c1dcd
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
@@ -0,0 +1,32 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+
+declare i8 addrspace(1)* @gc_call()
+
+declare i32* @fake_personality_function()
+
+define i8 addrspace(1)* @test(i1 %c) gc "statepoint-example" personality i32* ()* @fake_personality_function {
+; CHECK-LABEL: @test(
+entry:
+ br i1 %c, label %gc_invoke, label %normal_dest
+
+gc_invoke:
+; CHECK: [[TOKEN:%[^ ]+]] = invoke token {{[^@]+}}@llvm.experimental.gc.statepoint{{[^@]+}}@gc_call
+ %obj = invoke i8 addrspace(1)* @gc_call() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ to label %normal_dest unwind label %unwind_dest
+
+unwind_dest:
+; CHECK: unwind_dest:
+ %lpad = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } undef
+
+; CHECK: [[NORMAL_DEST_SPLIT:[^:]+:]]
+; CHECK-NEXT: [[RET_VAL:%[^ ]+]] = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token [[TOKEN]])
+; CHECK-NEXT: br label %normal_dest
+
+normal_dest:
+; CHECK: normal_dest:
+; CHECK-NEXT: %merge = phi i8 addrspace(1)* [ null, %entry ], [ %obj2, %normal_dest1 ]
+ %merge = phi i8 addrspace(1)* [ null, %entry ], [ %obj, %gc_invoke ]
+ ret i8 addrspace(1)* %merge
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
index 5913db21fcf3..b4954f6a9b60 100644
--- a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
@@ -5,18 +5,20 @@ declare i8 addrspace(1)* @some_function()
declare void @some_function_consumer(i8 addrspace(1)*)
declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref()
; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+declare noalias i8 addrspace(1)* @some_function_ret_noalias()
+; CHECK: declare i8 addrspace(1)* @some_function_ret_noalias()
define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
entry:
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -25,7 +27,7 @@ define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
entry:
%a = call dereferenceable(4) i8 addrspace(1)* @some_function()
; CHECK: %a = call i8 addrspace(1)* @some_function()
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -34,7 +36,7 @@ define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
entry:
%a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
; CHECK: %a = call i8 addrspace(1)* @some_function()
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -43,7 +45,7 @@ define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
entry:
; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
%tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 %tmp
}
@@ -53,7 +55,7 @@ entry:
; No change here, but the prototype of some_function_ret_deref should have changed.
; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
%a = call i8 addrspace(1)* @some_function_ret_deref()
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %a
}
@@ -62,11 +64,46 @@ define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "
entry:
; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) %ptr)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %ptr
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
+entry:
+ %a = call noalias i8 addrspace(1)* @some_function()
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_noalias(
+entry:
+; No change here, but the prototype of some_function_ret_noalias should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+ %a = call i8 addrspace(1)* @some_function_ret_noalias()
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_callsite_arg_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_noalias(
+entry:
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+ call void @some_function_consumer(i8 addrspace(1)* noalias %ptr)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ ret i8 addrspace(1)* %ptr
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
!0 = !{!1, !1, i64 0, i64 1}
!1 = !{!"red", !2}
diff --git a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
index 355ffa634f3c..3cd4bc65d1a5 100644
--- a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
@@ -8,12 +8,12 @@
declare void @foo()
declare void @use(...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
-; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 7, i32 7)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7)
; CHECK-NEXT: %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
ret void
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
index 26ad73737adc..584fd7add1b6 100644
--- a/test/Transforms/RewriteStatepointsForGC/live-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
@@ -10,7 +10,7 @@ define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %obj
}
@@ -28,7 +28,7 @@ define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoi
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
entry:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
@@ -48,7 +48,7 @@ define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepo
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
@@ -63,7 +63,7 @@ define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepo
; CHECK-NEXT: gc.statepoint
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
to label %normal_return unwind label %exceptional_return
; CHECK-LABEL: normal_return:
@@ -86,7 +86,7 @@ normal_return: ; preds = %entry
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %14
exceptional_return: ; preds = %entry
- %landing_pad4 = landingpad { i8*, i32 }
+ %landing_pad4 = landingpad token
cleanup
ret <2 x i64 addrspace(1)*> %obj
}
@@ -110,7 +110,7 @@ define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p)
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %vec
}
@@ -121,9 +121,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
; CHECK-LABEL: test6
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
-; CHECK-NEXT: = phi
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
@@ -131,12 +128,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
@@ -151,11 +142,11 @@ untaken:
merge:
%obj = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
index 6bc4d5324494..207003c17b5f 100644
--- a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
@@ -16,7 +16,7 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
untaken:
@@ -25,7 +25,7 @@ untaken:
; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
merge:
@@ -42,7 +42,7 @@ entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: br
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br i1 %cmp, label %taken, label %untaken
taken:
@@ -54,7 +54,7 @@ taken:
; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
%obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %obj
untaken:
@@ -76,16 +76,16 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
%obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
untaken:
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
merge:
@@ -114,9 +114,9 @@ entry:
; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated1.casted
;
%derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %derived
}
@@ -136,7 +136,7 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %merge
untaken:
@@ -163,4 +163,4 @@ final:
declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
index 012fff5c9e19..e1657497485b 100644
--- a/test/Transforms/RewriteStatepointsForGC/preprocess.ll
+++ b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
@@ -17,7 +17,7 @@ next:
; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
%obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
call void (...) @consume(i64 addrspace(1)* %obj2)
call void (...) @consume(i64 addrspace(1)* %obj)
ret void
@@ -33,7 +33,7 @@ define void @test7() gc "statepoint-example" {
unreached:
%obj = phi i64 addrspace(1)* [null, %unreached]
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
call void (...) @consume(i64 addrspace(1)* %obj)
br label %unreached
}
@@ -46,7 +46,7 @@ define void @test8() gc "statepoint-example" personality i32 ()* undef {
ret void
unreached:
- invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
to label %normal_return unwind label %exceptional_return
normal_return: ; preds = %entry
@@ -62,4 +62,4 @@ declare void @foo()
; Bound the last check-not
; CHECK-LABEL: @foo
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
index 1a5289b26656..d11441e9346f 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
@@ -25,9 +25,9 @@ normal_dest:
;; CHECK-NEXT: gc.statepoint
;; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
;; CHECK-NEXT: bitcast
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64* addrspace(1)* %obj
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/relocation.ll b/test/Transforms/RewriteStatepointsForGC/relocation.ll
index d7a84e5820c8..deea377c5a28 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocation.ll
@@ -9,7 +9,7 @@ entry:
; CHECK-LABEL: @test1
; CHECK-DAG: %obj.relocated
; CHECK-DAG: %obj2.relocated
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %joint
joint:
@@ -61,7 +61,7 @@ loop_x:
br label %loop.backedge
loop.backedge:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
br label %loop
loop_y:
@@ -79,14 +79,14 @@ if_branch:
; CHECK-LABEL: if_branch:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %join
else_branch:
; CHECK-LABEL: else_branch:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- %safepoint_token1 = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
br label %join
join:
@@ -110,8 +110,8 @@ entry:
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.statepoint
- %safepoint_token = call i32 (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- %safepoint_token1 = call i32 (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token1 = call token (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret void
}
@@ -124,9 +124,9 @@ define void @test4() gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK: @use(i8 addrspace(1)* %res.relocated)
- %safepoint_token2 = tail call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
- %res = call i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32 %safepoint_token2)
- call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ %safepoint_token2 = tail call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ %res = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token %safepoint_token2)
+ call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
call void (...) @use(i8 addrspace(1)* %res)
unreachable
}
@@ -136,7 +136,7 @@ define void @test4() gc "statepoint-example" {
define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
; CHECK-LABEL: test5
entry:
- call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
switch i32 undef, label %kill [
i32 10, label %merge
i32 13, label %merge
@@ -170,7 +170,7 @@ do_safepoint:
; CHECK: arg1.relocated =
; CHECK: arg2.relocated =
; CHECK: arg3.relocated =
- call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
br label %gc.safepoint_poll.exit2
gc.safepoint_poll.exit2:
@@ -209,7 +209,7 @@ outer-inc:
; CHECK-LABEL: outer-inc:
; CHECK: %arg1.relocated
; CHECK: %arg2.relocated
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
br label %outer-loop
}
@@ -232,13 +232,13 @@ inner-loop:
; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: %outer-loop ]
; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
-; CHECKL phi i8 addrspace(1)*
+; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: %outer-loop ]
; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
; CHECK: gc.statepoint
; CHECK: %arg1.relocated
; CHECK: %arg2.relocated
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
br i1 %cmp, label %inner-loop, label %outer-inc
outer-inc:
@@ -258,7 +258,7 @@ branch2:
br i1 %condition, label %callbb, label %join2
callbb:
- %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
br label %join
join:
@@ -285,11 +285,11 @@ join2:
declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
-declare i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token) #3
diff --git a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
index f04e7c797cad..445ab7bd768d 100644
--- a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
@@ -10,8 +10,8 @@ define void @"test_gep_const"(i32 addrspace(1)* %base) gc "statepoint-example" {
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
; CHECK: getelementptr i32, i32 addrspace(1)* %base, i32 15
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
; CHECK: bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 15
call void @use_obj32(i32 addrspace(1)* %base)
@@ -24,8 +24,8 @@ define void @"test_gep_idx"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-ex
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
; CHECK: getelementptr
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 %idx
call void @use_obj32(i32 addrspace(1)* %base)
@@ -38,8 +38,8 @@ define void @"test_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
entry:
%ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
call void @use_obj32(i32 addrspace(1)* %base)
@@ -47,6 +47,40 @@ entry:
ret void
}
+define void @"test_bitcast_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast_bitcast
+entry:
+ %ptr1 = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ %ptr2 = bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+ ; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ ; CHECK: bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
+ ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+ ; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
+ ; CHECK: bitcast i64 addrspace(1)* %ptr1.remat to i16 addrspace(1)*
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj16(i16 addrspace(1)* %ptr2)
+ ret void
+}
+
+define void @"test_addrspacecast_addrspacecast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_addrspacecast_addrspacecast
+entry:
+ %ptr1 = addrspacecast i32 addrspace(1)* %base to i32*
+ %ptr2 = addrspacecast i32* %ptr1 to i32 addrspace(1)*
+ ; CHECK: addrspacecast i32 addrspace(1)* %base to i32*
+ ; CHECK: addrspacecast i32* %ptr1 to i32 addrspace(1)*
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
+ ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+ ; CHECK: %ptr2.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 8)
+ ; CHECK: %ptr2.relocated.casted = bitcast i8 addrspace(1)* %ptr2.relocated to i32 addrspace(1)*
+ call void @use_obj32(i32 addrspace(1)* %base)
+ call void @use_obj32(i32 addrspace(1)* %ptr2)
+ ret void
+}
+
define void @"test_bitcast_gep"(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_bitcast_gep
entry:
@@ -54,7 +88,7 @@ entry:
; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
; CHECK: bitcast
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -73,7 +107,7 @@ entry:
; CHECK: bitcast
%ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
; CHECK: bitcast
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: getelementptr
; CHECK: bitcast
; CHECK: getelementptr
@@ -96,7 +130,7 @@ entry:
; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
; CHECK: bitcast
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: gc.relocate
@@ -112,7 +146,7 @@ entry:
%ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
; CHECK: getelementptr
; CHECK: getelementptr
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -127,7 +161,7 @@ define void @"test_gep_smallint_array"([3 x i32] addrspace(1)* %base) gc "statep
entry:
%ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
; CHECK: getelementptr
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -146,7 +180,7 @@ entry:
; CHECK: bitcast
%ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
; CHECK: bitcast
- %sp = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
to label %normal unwind label %exception
normal:
@@ -163,7 +197,7 @@ normal:
exception:
; CHECK-LABEL: exception:
- %landing_pad4 = landingpad { i8*, i32 }
+ %landing_pad4 = landingpad token
cleanup
; CHECK: gc.relocate
; CHECK: bitcast
@@ -187,7 +221,7 @@ loop:
; CHECK: phi i32 addrspace(1)* [ %ptr.gep, %entry ], [ %ptr.gep.remat, %loop ]
; CHECK: phi i32 addrspace(1)* [ %base, %entry ], [ %base.relocated.casted, %loop ]
call void @use_obj32(i32 addrspace(1)* %ptr.gep)
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: getelementptr
@@ -209,7 +243,7 @@ entry:
%ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
%ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
%ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
- %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.relocate
; CHECK: bitcast
; CHECK: gc.relocate
@@ -219,4 +253,4 @@ entry:
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/SCCP/global-alias-constprop.ll b/test/Transforms/SCCP/global-alias-constprop.ll
new file mode 100644
index 000000000000..be7e083e6a67
--- /dev/null
+++ b/test/Transforms/SCCP/global-alias-constprop.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+@0 = private unnamed_addr constant [2 x i32] [i32 -1, i32 1]
+@"\01??_7A@@6B@" = unnamed_addr alias i32, getelementptr inbounds ([2 x i32], [2 x i32]* @0, i32 0, i32 1)
+
+; CHECK: ret i32 1
+
+define i32 @main() {
+ %a = load i32, i32* @"\01??_7A@@6B@"
+ ret i32 %a
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll
index 1cff73d9f695..2bce59c62000 100644
--- a/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -slp-vectorizer %s | FileCheck %s
+; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
new file mode 100644
index 000000000000..8f8bf2648aa2
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -0,0 +1,270 @@
+; RUN: opt -slp-vectorizer -slp-threshold=-6 -S < %s | FileCheck %s
+
+; FIXME: The threshold is changed to keep this test case a bit smaller.
+; The AArch64 cost model should not give such high costs to select statements.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux"
+
+; CHECK-LABEL: test_select
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: select <4 x i1>
+define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
+entry:
+ %cmp.22 = icmp sgt i32 %h, 0
+ br i1 %cmp.22, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %idx.ext = sext i32 %lx to i64
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %s.026 = phi i32 [ 0, %for.body.lr.ph ], [ %add27, %for.body ]
+ %j.025 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %p2.024 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr29, %for.body ]
+ %p1.023 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ %0 = load i32, i32* %p1.023, align 4
+ %1 = load i32, i32* %p2.024, align 4
+ %sub = sub nsw i32 %0, %1
+ %cmp2 = icmp slt i32 %sub, 0
+ %sub3 = sub nsw i32 0, %sub
+ %sub3.sub = select i1 %cmp2, i32 %sub3, i32 %sub
+ %add = add nsw i32 %sub3.sub, %s.026
+ %arrayidx4 = getelementptr inbounds i32, i32* %p1.023, i64 1
+ %2 = load i32, i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32* %p2.024, i64 1
+ %3 = load i32, i32* %arrayidx5, align 4
+ %sub6 = sub nsw i32 %2, %3
+ %cmp7 = icmp slt i32 %sub6, 0
+ %sub9 = sub nsw i32 0, %sub6
+ %v.1 = select i1 %cmp7, i32 %sub9, i32 %sub6
+ %add11 = add nsw i32 %add, %v.1
+ %arrayidx12 = getelementptr inbounds i32, i32* %p1.023, i64 2
+ %4 = load i32, i32* %arrayidx12, align 4
+ %arrayidx13 = getelementptr inbounds i32, i32* %p2.024, i64 2
+ %5 = load i32, i32* %arrayidx13, align 4
+ %sub14 = sub nsw i32 %4, %5
+ %cmp15 = icmp slt i32 %sub14, 0
+ %sub17 = sub nsw i32 0, %sub14
+ %sub17.sub14 = select i1 %cmp15, i32 %sub17, i32 %sub14
+ %add19 = add nsw i32 %add11, %sub17.sub14
+ %arrayidx20 = getelementptr inbounds i32, i32* %p1.023, i64 3
+ %6 = load i32, i32* %arrayidx20, align 4
+ %arrayidx21 = getelementptr inbounds i32, i32* %p2.024, i64 3
+ %7 = load i32, i32* %arrayidx21, align 4
+ %sub22 = sub nsw i32 %6, %7
+ %cmp23 = icmp slt i32 %sub22, 0
+ %sub25 = sub nsw i32 0, %sub22
+ %v.3 = select i1 %cmp23, i32 %sub25, i32 %sub22
+ %add27 = add nsw i32 %add19, %v.3
+ %add.ptr = getelementptr inbounds i32, i32* %p1.023, i64 %idx.ext
+ %add.ptr29 = getelementptr inbounds i32, i32* %p2.024, i64 %idx.ext
+ %inc = add nuw nsw i32 %j.025, 1
+ %exitcond = icmp eq i32 %inc, %h
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s.0.lcssa = phi i32 [ 0, %entry ], [ %add27, %for.end.loopexit ]
+ ret i32 %s.0.lcssa
+}
+
+;; Check whether SLP can find a reduction phi whose incoming blocks are not
+;; the same as the block containing the phi.
+;;
+;; Came from code like,
+;;
+;; int s = 0;
+;; for (int j = 0; j < h; j++) {
+;; s += p1[0] * p2[0]
+;; s += p1[1] * p2[1];
+;; s += p1[2] * p2[2];
+;; s += p1[3] * p2[3];
+;; if (s >= lim)
+;; break;
+;; p1 += lx;
+;; p2 += lx;
+;; }
+define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
+; CHECK-LABEL: reduction_with_br
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: mul nsw <4 x i32>
+entry:
+ %cmp.16 = icmp sgt i32 %h, 0
+ br i1 %cmp.16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %idx.ext = sext i32 %lx to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %if.end
+ %s.020 = phi i32 [ 0, %for.body.lr.ph ], [ %add13, %if.end ]
+ %j.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+ %p2.018 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr16, %if.end ]
+ %p1.017 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end ]
+ %0 = load i32, i32* %p1.017, align 4
+ %1 = load i32, i32* %p2.018, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %s.020
+ %arrayidx2 = getelementptr inbounds i32, i32* %p1.017, i64 1
+ %2 = load i32, i32* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %p2.018, i64 1
+ %3 = load i32, i32* %arrayidx3, align 4
+ %mul4 = mul nsw i32 %3, %2
+ %add5 = add nsw i32 %add, %mul4
+ %arrayidx6 = getelementptr inbounds i32, i32* %p1.017, i64 2
+ %4 = load i32, i32* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds i32, i32* %p2.018, i64 2
+ %5 = load i32, i32* %arrayidx7, align 4
+ %mul8 = mul nsw i32 %5, %4
+ %add9 = add nsw i32 %add5, %mul8
+ %arrayidx10 = getelementptr inbounds i32, i32* %p1.017, i64 3
+ %6 = load i32, i32* %arrayidx10, align 4
+ %arrayidx11 = getelementptr inbounds i32, i32* %p2.018, i64 3
+ %7 = load i32, i32* %arrayidx11, align 4
+ %mul12 = mul nsw i32 %7, %6
+ %add13 = add nsw i32 %add9, %mul12
+ %cmp14 = icmp slt i32 %add13, %lim
+ br i1 %cmp14, label %if.end, label %for.end.loopexit
+
+if.end: ; preds = %for.body
+ %add.ptr = getelementptr inbounds i32, i32* %p1.017, i64 %idx.ext
+ %add.ptr16 = getelementptr inbounds i32, i32* %p2.018, i64 %idx.ext
+ %inc = add nuw nsw i32 %j.019, 1
+ %cmp = icmp slt i32 %inc, %h
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body, %if.end
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s.1 = phi i32 [ 0, %entry ], [ %add13, %for.end.loopexit ]
+ ret i32 %s.1
+}
+
+; CHECK: test_unrolled_select
+; CHECK: load <8 x i8>
+; CHECK: load <8 x i8>
+; CHECK: select <8 x i1>
+define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
+entry:
+ %cmp.43 = icmp sgt i32 %h, 0
+ br i1 %cmp.43, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %idx.ext = sext i32 %lx to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %if.end.86
+ %s.047 = phi i32 [ 0, %for.body.lr.ph ], [ %add82, %if.end.86 ]
+ %j.046 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end.86 ]
+ %p2.045 = phi i8* [ %blk2, %for.body.lr.ph ], [ %add.ptr88, %if.end.86 ]
+ %p1.044 = phi i8* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.86 ]
+ %0 = load i8, i8* %p1.044, align 1
+ %conv = zext i8 %0 to i32
+ %1 = load i8, i8* %p2.045, align 1
+ %conv2 = zext i8 %1 to i32
+ %sub = sub nsw i32 %conv, %conv2
+ %cmp3 = icmp slt i32 %sub, 0
+ %sub5 = sub nsw i32 0, %sub
+ %sub5.sub = select i1 %cmp3, i32 %sub5, i32 %sub
+ %add = add nsw i32 %sub5.sub, %s.047
+ %arrayidx6 = getelementptr inbounds i8, i8* %p1.044, i64 1
+ %2 = load i8, i8* %arrayidx6, align 1
+ %conv7 = zext i8 %2 to i32
+ %arrayidx8 = getelementptr inbounds i8, i8* %p2.045, i64 1
+ %3 = load i8, i8* %arrayidx8, align 1
+ %conv9 = zext i8 %3 to i32
+ %sub10 = sub nsw i32 %conv7, %conv9
+ %cmp11 = icmp slt i32 %sub10, 0
+ %sub14 = sub nsw i32 0, %sub10
+ %v.1 = select i1 %cmp11, i32 %sub14, i32 %sub10
+ %add16 = add nsw i32 %add, %v.1
+ %arrayidx17 = getelementptr inbounds i8, i8* %p1.044, i64 2
+ %4 = load i8, i8* %arrayidx17, align 1
+ %conv18 = zext i8 %4 to i32
+ %arrayidx19 = getelementptr inbounds i8, i8* %p2.045, i64 2
+ %5 = load i8, i8* %arrayidx19, align 1
+ %conv20 = zext i8 %5 to i32
+ %sub21 = sub nsw i32 %conv18, %conv20
+ %cmp22 = icmp slt i32 %sub21, 0
+ %sub25 = sub nsw i32 0, %sub21
+ %sub25.sub21 = select i1 %cmp22, i32 %sub25, i32 %sub21
+ %add27 = add nsw i32 %add16, %sub25.sub21
+ %arrayidx28 = getelementptr inbounds i8, i8* %p1.044, i64 3
+ %6 = load i8, i8* %arrayidx28, align 1
+ %conv29 = zext i8 %6 to i32
+ %arrayidx30 = getelementptr inbounds i8, i8* %p2.045, i64 3
+ %7 = load i8, i8* %arrayidx30, align 1
+ %conv31 = zext i8 %7 to i32
+ %sub32 = sub nsw i32 %conv29, %conv31
+ %cmp33 = icmp slt i32 %sub32, 0
+ %sub36 = sub nsw i32 0, %sub32
+ %v.3 = select i1 %cmp33, i32 %sub36, i32 %sub32
+ %add38 = add nsw i32 %add27, %v.3
+ %arrayidx39 = getelementptr inbounds i8, i8* %p1.044, i64 4
+ %8 = load i8, i8* %arrayidx39, align 1
+ %conv40 = zext i8 %8 to i32
+ %arrayidx41 = getelementptr inbounds i8, i8* %p2.045, i64 4
+ %9 = load i8, i8* %arrayidx41, align 1
+ %conv42 = zext i8 %9 to i32
+ %sub43 = sub nsw i32 %conv40, %conv42
+ %cmp44 = icmp slt i32 %sub43, 0
+ %sub47 = sub nsw i32 0, %sub43
+ %sub47.sub43 = select i1 %cmp44, i32 %sub47, i32 %sub43
+ %add49 = add nsw i32 %add38, %sub47.sub43
+ %arrayidx50 = getelementptr inbounds i8, i8* %p1.044, i64 5
+ %10 = load i8, i8* %arrayidx50, align 1
+ %conv51 = zext i8 %10 to i32
+ %arrayidx52 = getelementptr inbounds i8, i8* %p2.045, i64 5
+ %11 = load i8, i8* %arrayidx52, align 1
+ %conv53 = zext i8 %11 to i32
+ %sub54 = sub nsw i32 %conv51, %conv53
+ %cmp55 = icmp slt i32 %sub54, 0
+ %sub58 = sub nsw i32 0, %sub54
+ %v.5 = select i1 %cmp55, i32 %sub58, i32 %sub54
+ %add60 = add nsw i32 %add49, %v.5
+ %arrayidx61 = getelementptr inbounds i8, i8* %p1.044, i64 6
+ %12 = load i8, i8* %arrayidx61, align 1
+ %conv62 = zext i8 %12 to i32
+ %arrayidx63 = getelementptr inbounds i8, i8* %p2.045, i64 6
+ %13 = load i8, i8* %arrayidx63, align 1
+ %conv64 = zext i8 %13 to i32
+ %sub65 = sub nsw i32 %conv62, %conv64
+ %cmp66 = icmp slt i32 %sub65, 0
+ %sub69 = sub nsw i32 0, %sub65
+ %sub69.sub65 = select i1 %cmp66, i32 %sub69, i32 %sub65
+ %add71 = add nsw i32 %add60, %sub69.sub65
+ %arrayidx72 = getelementptr inbounds i8, i8* %p1.044, i64 7
+ %14 = load i8, i8* %arrayidx72, align 1
+ %conv73 = zext i8 %14 to i32
+ %arrayidx74 = getelementptr inbounds i8, i8* %p2.045, i64 7
+ %15 = load i8, i8* %arrayidx74, align 1
+ %conv75 = zext i8 %15 to i32
+ %sub76 = sub nsw i32 %conv73, %conv75
+ %cmp77 = icmp slt i32 %sub76, 0
+ %sub80 = sub nsw i32 0, %sub76
+ %v.7 = select i1 %cmp77, i32 %sub80, i32 %sub76
+ %add82 = add nsw i32 %add71, %v.7
+ %cmp83 = icmp slt i32 %add82, %lim
+ br i1 %cmp83, label %if.end.86, label %for.end.loopexit
+
+if.end.86: ; preds = %for.body
+ %add.ptr = getelementptr inbounds i8, i8* %p1.044, i64 %idx.ext
+ %add.ptr88 = getelementptr inbounds i8, i8* %p2.045, i64 %idx.ext
+ %inc = add nuw nsw i32 %j.046, 1
+ %cmp = icmp slt i32 %inc, %h
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body, %if.end.86
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %s.1 = phi i32 [ 0, %entry ], [ %add82, %for.end.loopexit ]
+ ret i32 %s.1
+}
+
diff --git a/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
new file mode 100644
index 000000000000..87d021d534cf
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
@@ -0,0 +1,76 @@
+; RUN: opt -S -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; CHECK-LABEL: @foo
+define void @foo(float* noalias %a, float* noalias %b, float* noalias %c) {
+entry:
+; Check that we don't lose !nontemporal hint when vectorizing loads.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ %b1 = load float, float* %b, align 4, !nontemporal !0
+ %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
+ %b2 = load float, float* %arrayidx.1, align 4, !nontemporal !0
+ %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
+ %b3 = load float, float* %arrayidx.2, align 4, !nontemporal !0
+ %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
+ %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+
+; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ %c1 = load float, float* %c, align 4
+ %arrayidx2.1 = getelementptr inbounds float, float* %c, i64 1
+ %c2 = load float, float* %arrayidx2.1, align 4
+ %arrayidx2.2 = getelementptr inbounds float, float* %c, i64 2
+ %c3 = load float, float* %arrayidx2.2, align 4
+ %arrayidx2.3 = getelementptr inbounds float, float* %c, i64 3
+ %c4 = load float, float* %arrayidx2.3, align 4
+
+ %a1 = fadd float %b1, %c1
+ %a2 = fadd float %b2, %c2
+ %a3 = fadd float %b3, %c3
+ %a4 = fadd float %b4, %c4
+
+; Check that we don't lose !nontemporal hint when vectorizing stores.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+ store float %a1, float* %a, align 4, !nontemporal !0
+ %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
+ store float %a2, float* %arrayidx3.1, align 4, !nontemporal !0
+ %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
+ store float %a3, float* %arrayidx3.2, align 4, !nontemporal !0
+ %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
+ store float %a4, float* %arrayidx3.3, align 4, !nontemporal !0
+
+; CHECK: ret void
+ ret void
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float* noalias %a, float* noalias %b) {
+entry:
+; Check that we don't mark vector load with !nontemporal attribute if some of
+; the original scalar loads don't have it.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ %b1 = load float, float* %b, align 4, !nontemporal !0
+ %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
+ %b2 = load float, float* %arrayidx.1, align 4
+ %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
+ %b3 = load float, float* %arrayidx.2, align 4
+ %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
+ %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+
+; Check that we don't mark vector store with !nontemporal attribute if some of
+; the original scalar stores don't have it.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+ store float %b1, float* %a, align 4, !nontemporal !0
+ %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
+ store float %b2, float* %arrayidx3.1, align 4
+ %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
+ store float %b3, float* %arrayidx3.2, align 4
+ %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
+ store float %b4, float* %arrayidx3.3, align 4, !nontemporal !0
+
+; CHECK: ret void
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll
index 2d8f3832ee29..98c29068bb96 100644
--- a/test/Transforms/SLPVectorizer/X86/bad_types.ll
+++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -47,4 +47,30 @@ exit:
ret void
}
+define i8 @test3(i8 *%addr) {
+; Check that we do not vectorize types that are padded to a bigger ones.
+;
+; CHECK-LABEL: @test3
+; CHECK-NOT: <4 x i2>
+; CHECK: ret i8
+entry:
+ %a = bitcast i8* %addr to i2*
+ %a0 = getelementptr inbounds i2, i2* %a, i64 0
+ %a1 = getelementptr inbounds i2, i2* %a, i64 1
+ %a2 = getelementptr inbounds i2, i2* %a, i64 2
+ %a3 = getelementptr inbounds i2, i2* %a, i64 3
+ %l0 = load i2, i2* %a0, align 1
+ %l1 = load i2, i2* %a1, align 1
+ %l2 = load i2, i2* %a2, align 1
+ %l3 = load i2, i2* %a3, align 1
+ br label %bb1
+bb1: ; preds = %entry
+ %p0 = phi i2 [ %l0, %entry ]
+ %p1 = phi i2 [ %l1, %entry ]
+ %p2 = phi i2 [ %l2, %entry ]
+ %p3 = phi i2 [ %l3, %entry ]
+ %r = zext i2 %p2 to i8
+ ret i8 %r
+}
+
declare void @f(i64, i64)
diff --git a/test/Transforms/SLPVectorizer/X86/commutativity.ll b/test/Transforms/SLPVectorizer/X86/commutativity.ll
new file mode 100644
index 000000000000..2798ccb15e48
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -0,0 +1,78 @@
+; RUN: opt -slp-vectorizer < %s -S | FileCheck %s
+
+; Verify that the SLP vectorizer is able to figure out that commutativity
+; offers the possibility to splat/broadcast %c and thus make it profitable
+; to vectorize this case
+
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+@cle = external unnamed_addr global [32 x i8], align 16
+@cle32 = external unnamed_addr global [32 x i32], align 16
+
+
+; Check that we correctly detect a splat/broadcast by leveraging the
+; commutativity property of `xor`.
+
+; CHECK-LABEL: @splat
+; CHECK: store <16 x i8>
+define void @splat(i8 %a, i8 %b, i8 %c) {
+ %1 = xor i8 %c, %a
+ store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
+ %2 = xor i8 %a, %c
+ store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
+ %3 = xor i8 %a, %c
+ store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
+ %4 = xor i8 %a, %c
+ store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
+ %5 = xor i8 %c, %a
+ store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
+ %6 = xor i8 %c, %b
+ store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
+ %7 = xor i8 %c, %a
+ store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
+ %8 = xor i8 %c, %b
+ store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
+ %9 = xor i8 %a, %c
+ store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
+ %10 = xor i8 %a, %c
+ store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
+ %11 = xor i8 %a, %c
+ store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
+ %12 = xor i8 %a, %c
+ store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
+ %13 = xor i8 %a, %c
+ store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
+ %14 = xor i8 %a, %c
+ store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
+ %15 = xor i8 %a, %c
+ store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
+ %16 = xor i8 %a, %c
+ store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
+ ret void
+}
+
+
+
+; Check that we correctly detect that we can have the same opcode on one side by
+; leveraging the commutativity property of `xor`.
+
+; CHECK-LABEL: @same_opcode_on_one_side
+; CHECK: store <4 x i32>
+define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
+ %add1 = add i32 %c, %a
+ %add2 = add i32 %c, %a
+ %add3 = add i32 %a, %c
+ %add4 = add i32 %c, %a
+ %1 = xor i32 %add1, %a
+ store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
+ %2 = xor i32 %b, %add2
+ store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
+ %3 = xor i32 %c, %add3
+ store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
+ %4 = xor i32 %a, %add4
+ store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 1d349fbc98b5..4472225811b1 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-apple-macosx10.7.0"
;CHECK: ![[LOC]] = !DILocation(line: 4, scope:
;CHECK: ![[LOC2]] = !DILocation(line: 7, scope:
-define i32 @depth(double* nocapture %A, i32 %m) #0 {
+define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
@@ -57,11 +57,11 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !32}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "file.c", directory: "/Users/nadav")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (double*, i32)* @depth, variables: !11)
+!4 = distinct !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !11)
!5 = !DIFile(filename: "file.c", directory: "/Users/nadav")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9, !8}
@@ -69,11 +69,11 @@ attributes #1 = { nounwind readnone }
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
!10 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!11 = !{!12, !13, !14, !15, !16}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "A", line: 1, arg: 1, scope: !4, file: !5, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "m", line: 1, arg: 2, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y0", line: 2, scope: !4, file: !5, type: !10)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y1", line: 2, scope: !4, file: !5, type: !10)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !17, file: !5, type: !8)
+!12 = !DILocalVariable(name: "A", line: 1, arg: 1, scope: !4, file: !5, type: !9)
+!13 = !DILocalVariable(name: "m", line: 1, arg: 2, scope: !4, file: !5, type: !8)
+!14 = !DILocalVariable(name: "y0", line: 2, scope: !4, file: !5, type: !10)
+!15 = !DILocalVariable(name: "y1", line: 2, scope: !4, file: !5, type: !10)
+!16 = !DILocalVariable(name: "i", line: 3, scope: !17, file: !5, type: !8)
!17 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
!18 = !{i32 2, !"Dwarf Version", i32 2}
!19 = !DILocation(line: 1, scope: !4)
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 83b2e01f04e0..62cf4c1fcfb3 100644
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -slp-vectorizer -slp-vectorize-hor -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
+; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/SLPVectorizer/X86/pr23510.ll b/test/Transforms/SLPVectorizer/X86/pr23510.ll
new file mode 100644
index 000000000000..efdb0ecd9996
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr23510.ll
@@ -0,0 +1,38 @@
+; PR23510
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @_Z3fooPml(
+; CHECK: lshr <2 x i64>
+; CHECK: lshr <2 x i64>
+
+@total = global i64 0, align 8
+
+define void @_Z3fooPml(i64* nocapture %a, i64 %i) {
+entry:
+ %tmp = load i64, i64* %a, align 8
+ %shr = lshr i64 %tmp, 4
+ store i64 %shr, i64* %a, align 8
+ %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
+ %tmp1 = load i64, i64* %arrayidx1, align 8
+ %shr2 = lshr i64 %tmp1, 4
+ store i64 %shr2, i64* %arrayidx1, align 8
+ %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 %i
+ %tmp2 = load i64, i64* %arrayidx3, align 8
+ %tmp3 = load i64, i64* @total, align 8
+ %add = add i64 %tmp3, %tmp2
+ store i64 %add, i64* @total, align 8
+ %tmp4 = load i64, i64* %a, align 8
+ %shr5 = lshr i64 %tmp4, 4
+ store i64 %shr5, i64* %a, align 8
+ %tmp5 = load i64, i64* %arrayidx1, align 8
+ %shr7 = lshr i64 %tmp5, 4
+ store i64 %shr7, i64* %arrayidx1, align 8
+ %tmp6 = load i64, i64* %arrayidx3, align 8
+ %tmp7 = load i64, i64* @total, align 8
+ %add9 = add i64 %tmp7, %tmp6
+ store i64 %add9, i64* @total, align 8
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
new file mode 100644
index 000000000000..2cb2373381c7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test if the budget for the scheduling region size works.
+; We test with a reduced budget of 16 which should prevent vectorizing the loads.
+
+declare void @unknown()
+
+; CHECK-LABEL: @test
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: call void @unknown
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+define void @test(float * %a, float * %b, float * %c, float * %d) {
+entry:
+ ; Don't vectorize these loads.
+ %l0 = load float, float* %a
+ %a1 = getelementptr inbounds float, float* %a, i64 1
+ %l1 = load float, float* %a1
+ %a2 = getelementptr inbounds float, float* %a, i64 2
+ %l2 = load float, float* %a2
+ %a3 = getelementptr inbounds float, float* %a, i64 3
+ %l3 = load float, float* %a3
+
+ ; some unrelated instructions inbetween to enlarge the scheduling region
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+ call void @unknown()
+
+ ; Don't vectorize these stores because their operands are too far away.
+ store float %l0, float* %b
+ %b1 = getelementptr inbounds float, float* %b, i64 1
+ store float %l1, float* %b1
+ %b2 = getelementptr inbounds float, float* %b, i64 2
+ store float %l2, float* %b2
+ %b3 = getelementptr inbounds float, float* %b, i64 3
+ store float %l3, float* %b3
+
+ ; But still vectorize the following instructions, because even if the budget
+ ; is exceeded there is a minimum region size.
+ %l4 = load float, float* %c
+ %c1 = getelementptr inbounds float, float* %c, i64 1
+ %l5 = load float, float* %c1
+ %c2 = getelementptr inbounds float, float* %c, i64 2
+ %l6 = load float, float* %c2
+ %c3 = getelementptr inbounds float, float* %c, i64 3
+ %l7 = load float, float* %c3
+
+ store float %l4, float* %d
+ %d1 = getelementptr inbounds float, float* %d, i64 1
+ store float %l5, float* %d1
+ %d2 = getelementptr inbounds float, float* %d, i64 2
+ store float %l6, float* %d2
+ %d3 = getelementptr inbounds float, float* %d, i64 3
+ store float %l7, float* %d3
+
+ ret void
+}
+
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index ad2794167a5e..7b5daa9d7823 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1,5 +1,5 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+; RUN: opt < %s -passes=sroa -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
@@ -1610,3 +1610,26 @@ entry:
%load = load atomic volatile i64, i64* %ptr seq_cst, align 8
ret void
}
+
+define i16 @PR24463() {
+; Ensure we can handle a very interesting case where there is an integer-based
+; rewrite of the uses of the alloca, but where one of the integers in that is
+; a sub-integer that requires extraction *and* extends past the end of the
+; alloca. In this case, we should extract the i8 and then zext it to i16.
+;
+; CHECK-LABEL: @PR24463(
+; CHECK-NOT: alloca
+; CHECK: %[[SHIFT:.*]] = lshr i16 0, 8
+; CHECK: %[[TRUNC:.*]] = trunc i16 %[[SHIFT]] to i8
+; CHECK: %[[ZEXT:.*]] = zext i8 %[[TRUNC]] to i16
+; CHECK: ret i16 %[[ZEXT]]
+entry:
+ %alloca = alloca [3 x i8]
+ %gep1 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 1
+ %bc1 = bitcast i8* %gep1 to i16*
+ store i16 0, i16* %bc1
+ %gep2 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 2
+ %bc2 = bitcast i8* %gep2 to i16*
+ %load = load i16, i16* %bc2
+ ret i16 %load
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 4de7bfcb898d..ea41a20fd38e 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -1,5 +1,4 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
diff --git a/test/Transforms/SROA/fca.ll b/test/Transforms/SROA/fca.ll
index 6eaa73f53189..707f680e64e8 100644
--- a/test/Transforms/SROA/fca.ll
+++ b/test/Transforms/SROA/fca.ll
@@ -1,5 +1,4 @@
; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
define { i32, i32 } @test0(i32 %x, i32 %y) {
diff --git a/test/Transforms/SafeStack/AArch64/abi.ll b/test/Transforms/SafeStack/AArch64/abi.ll
new file mode 100644
index 000000000000..cdec923eb74c
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/abi.ll
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=aarch64-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[TP:.*]] = call i8* @llvm.aarch64.thread.pointer()
+; CHECK: %[[SPA0:.*]] = getelementptr i8, i8* %[[TP]], i32 72
+; CHECK: %[[SPA:.*]] = bitcast i8* %[[SPA0]] to i8**
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+ %a = alloca i8, align 8
+ call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/AArch64/lit.local.cfg b/test/Transforms/SafeStack/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cec29af5bbe4
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/ARM/abi.ll b/test/Transforms/SafeStack/ARM/abi.ll
new file mode 100644
index 000000000000..e33c913ae916
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/abi.ll
@@ -0,0 +1,18 @@
+; RUN: opt -safe-stack -S -mtriple=arm-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+ %a = alloca i8, align 8
+ call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/ARM/lit.local.cfg b/test/Transforms/SafeStack/ARM/lit.local.cfg
new file mode 100644
index 000000000000..98c6700c209d
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/ARM/setjmp.ll b/test/Transforms/SafeStack/ARM/setjmp.ll
new file mode 100644
index 000000000000..8c57908bbe4b
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/setjmp.ll
@@ -0,0 +1,34 @@
+; Test stack pointer restore after setjmp() with the function-call safestack ABI.
+; RUN: opt -safe-stack -S -mtriple=arm-linux-androideabi < %s -o - | FileCheck %s
+
+@env = global [64 x i32] zeroinitializer, align 4
+
+define void @f(i32 %b) safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USDP:.*]] = alloca i8*
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: store i8* %[[USP]], i8** %[[USDP]]
+; CHECK: call i32 @setjmp
+
+ %call = call i32 @setjmp(i32* getelementptr inbounds ([64 x i32], [64 x i32]* @env, i32 0, i32 0)) returns_twice
+
+; CHECK: %[[USP2:.*]] = load i8*, i8** %[[USDP]]
+; CHECK: store i8* %[[USP2]], i8** %[[SPA]]
+
+ %tobool = icmp eq i32 %b, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+ %0 = alloca [42 x i8], align 1
+ %.sub = getelementptr inbounds [42 x i8], [42 x i8]* %0, i32 0, i32 0
+ call void @_Z7CapturePv(i8* %.sub)
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare i32 @setjmp(i32*) returns_twice
+
+declare void @_Z7CapturePv(i8*)
diff --git a/test/Transforms/SafeStack/X86/abi.ll b/test/Transforms/SafeStack/X86/abi.ll
new file mode 100644
index 000000000000..f437c4f7157d
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/abi.ll
@@ -0,0 +1,30 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS32
+; RUN: opt -safe-stack -S -mtriple=x86_64-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS64
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; TLS: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; TLS: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; TLS: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+; DIRECT-TLS32: %[[USP:.*]] = load i8*, i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS32: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS32: store i8* %[[USST]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+
+; DIRECT-TLS64: %[[USP:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+; DIRECT-TLS64: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS64: store i8* %[[USST]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+
+ %a = alloca i8, align 8
+ call void @Capture(i8* %a)
+
+; TLS: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+; DIRECT-TLS32: store i8* %[[USP]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS64: store i8* %[[USP]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/X86/lit.local.cfg b/test/Transforms/SafeStack/X86/lit.local.cfg
new file mode 100644
index 000000000000..e71f3cc4c41e
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/array.ll b/test/Transforms/SafeStack/array.ll
index 6036bfc2c9c5..564213e6d58f 100644
--- a/test/Transforms/SafeStack/array.ll
+++ b/test/Transforms/SafeStack/array.ll
@@ -1,9 +1,14 @@
; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-usp-storage=single-thread -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-usp-storage=single-thread -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
; array [4 x i8]
; Requires protector.
+; CHECK: @__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+; SINGLE-THREAD: @__safestack_unsafe_stack_ptr = external global i8*
+
define void @foo(i8* %a) nounwind uwtable safestack {
entry:
; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
@@ -35,4 +40,52 @@ entry:
ret void
}
+; Load from an array at a fixed offset, no overflow.
+define i8 @StaticArrayFixedSafe() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @StaticArrayFixedSafe(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 4, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 2
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
+; Load from an array at a fixed offset with overflow.
+define i8 @StaticArrayFixedUnsafe() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @StaticArrayFixedUnsafe(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 4, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 5
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
+; Load from an array at an unknown offset.
+define i8 @StaticArrayVariableUnsafe(i32 %ofs) nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @StaticArrayVariableUnsafe(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 4, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 %ofs
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
+; Load from an array of an unknown size.
+define i8 @DynamicArrayUnsafe(i32 %sz) nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: define i8 @DynamicArrayUnsafe(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %buf = alloca i8, i32 %sz, align 1
+ %gep = getelementptr inbounds i8, i8* %buf, i32 2
+ %x = load i8, i8* %gep, align 1
+ ret i8 %x
+}
+
declare i8* @strcpy(i8*, i8*)
diff --git a/test/Transforms/SafeStack/byval.ll b/test/Transforms/SafeStack/byval.ll
new file mode 100644
index 000000000000..f9a06e54d2df
--- /dev/null
+++ b/test/Transforms/SafeStack/byval.ll
@@ -0,0 +1,51 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i32] }
+
+; Safe access to a byval argument.
+define i32 @ByValSafe(%struct.S* byval nocapture readonly align 8 %zzz) norecurse nounwind readonly safestack uwtable {
+entry:
+ ; CHECK-LABEL: @ByValSafe
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i32
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 3
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; Unsafe access to a byval argument.
+; Argument is copied to the unsafe stack.
+define i32 @ByValUnsafe(%struct.S* byval nocapture readonly align 8 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+ ; CHECK-LABEL: @ByValUnsafe
+ ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+ ; CHECK: store {{.*}} @__safestack_unsafe_stack_ptr
+ ; CHECK: %[[B:.*]] = getelementptr i8, i8* %[[A]], i32 -400
+ ; CHECK: %[[C:.*]] = bitcast %struct.S* %zzz to i8*
+ ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[B]], i8* %[[C]], i64 400, i32 8, i1 false)
+ ; CHECK: ret i32
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+}
+
+; Highly aligned byval argument.
+define i32 @ByValUnsafeAligned(%struct.S* byval nocapture readonly align 64 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+ ; CHECK-LABEL: @ByValUnsafeAligned
+ ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+ ; CHECK: %[[B:.*]] = ptrtoint i8* %[[A]] to i64
+ ; CHECK: and i64 %[[B]], -64
+ ; CHECK: ret i32
+ %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 0
+ %0 = load i32, i32* %arrayidx, align 64
+ %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+ %1 = load i32, i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ ret i32 %add
+}
+
diff --git a/test/Transforms/SafeStack/call.ll b/test/Transforms/SafeStack/call.ll
index ac12ec02b0b1..cbac4ce1bb0d 100644
--- a/test/Transforms/SafeStack/call.ll
+++ b/test/Transforms/SafeStack/call.ll
@@ -6,10 +6,11 @@
; no arrays / no nested arrays
; Requires no protector.
-; CHECK-LABEL: @foo(
define void @foo(i8* %a) nounwind uwtable safestack {
entry:
+ ; CHECK-LABEL: define void @foo(
; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
%a.addr = alloca i8*, align 8
store i8* %a, i8** %a.addr, align 8
%0 = load i8*, i8** %a.addr, align 8
@@ -18,3 +19,160 @@ entry:
}
declare i32 @printf(i8*, ...)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @call_memset(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_memset
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 %len, i32 1, i1 false)
+ ret void
+}
+
+define void @call_constant_memset() safestack {
+entry:
+ ; CHECK-LABEL: define void @call_constant_memset
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 2
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 7, i32 1, i1 false)
+ ret void
+}
+
+define void @call_constant_overflow_memset() safestack {
+entry:
+ ; CHECK-LABEL: define void @call_constant_overflow_memset
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 7
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 5, i32 1, i1 false)
+ ret void
+}
+
+define void @call_constant_underflow_memset() safestack {
+entry:
+ ; CHECK-LABEL: define void @call_constant_underflow_memset
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr [10 x i8], [10 x i8]* %q, i32 0, i32 -1
+ call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 3, i32 1, i1 false)
+ ret void
+}
+
+; Readnone nocapture -> safe
+define void @call_readnone(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readnone
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readnone(i8* %arraydecay)
+ ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg0 -> safe
+define void @call_readnone0_0(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readnone0_0
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readnone0(i8* %arraydecay, i8* zeroinitializer)
+ ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg1 -> unsafe
+define void @call_readnone0_1(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readnone0_1
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readnone0(i8 *zeroinitializer, i8* %arraydecay)
+ ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_readonly(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readonly
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readonly(i8* %arraydecay)
+ ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_arg_readonly(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_arg_readonly
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @arg_readonly(i8* %arraydecay)
+ ret void
+}
+
+; Readwrite nocapture -> unsafe
+define void @call_readwrite(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_readwrite
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @readwrite(i8* %arraydecay)
+ ret void
+}
+
+; Captures the argument -> unsafe
+define void @call_capture(i64 %len) safestack {
+entry:
+ ; CHECK-LABEL: define void @call_capture
+ ; CHECK: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %q = alloca [10 x i8], align 1
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+ call void @capture(i8* %arraydecay)
+ ret void
+}
+
+; Lifetime intrinsics are always safe.
+define void @call_lifetime(i32* %p) {
+ ; CHECK-LABEL: define void @call_lifetime
+ ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+entry:
+ %q = alloca [100 x i8], align 16
+ %0 = bitcast [100 x i8]* %q to i8*
+ call void @llvm.lifetime.start(i64 100, i8* %0)
+ call void @llvm.lifetime.end(i64 100, i8* %0)
+ ret void
+}
+
+declare void @readonly(i8* nocapture) readonly
+declare void @arg_readonly(i8* readonly nocapture)
+declare void @readwrite(i8* nocapture)
+declare void @capture(i8* readnone) readnone
+
+declare void @readnone(i8* nocapture) readnone
+declare void @readnone0(i8* nocapture readnone, i8* nocapture)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind argmemonly
diff --git a/test/Transforms/SafeStack/cast.ll b/test/Transforms/SafeStack/cast.ll
index df6273a117c3..23f525d5e0b1 100644
--- a/test/Transforms/SafeStack/cast.ll
+++ b/test/Transforms/SafeStack/cast.ll
@@ -4,14 +4,36 @@
@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
; PtrToInt/IntToPtr Cast
-; Requires no protector.
-; CHECK-LABEL: @foo(
-define void @foo() nounwind uwtable safestack {
+define void @IntToPtr() nounwind uwtable safestack {
entry:
+ ; CHECK-LABEL: @IntToPtr(
; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
%a = alloca i32, align 4
%0 = ptrtoint i32* %a to i64
%1 = inttoptr i64 %0 to i32*
ret void
}
+
+define i8 @BitCastNarrow() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @BitCastNarrow(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i8
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = load i8, i8* %0, align 1
+ ret i8 %1
+}
+
+define i64 @BitCastWide() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @BitCastWide(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i64
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i64*
+ %1 = load i64, i64* %0, align 1
+ ret i64 %1
+}
diff --git a/test/Transforms/SafeStack/debug-loc.ll b/test/Transforms/SafeStack/debug-loc.ll
new file mode 100644
index 000000000000..e72d0e9d2ff2
--- /dev/null
+++ b/test/Transforms/SafeStack/debug-loc.ll
@@ -0,0 +1,83 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test debug location for the local variables moved onto the unsafe stack.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i8] }
+
+; Function Attrs: safestack uwtable
+define void @f(%struct.S* byval align 8 %zzz) #0 !dbg !12 {
+; CHECK: define void @f
+
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+ %xxx = alloca %struct.S, align 1
+ call void @llvm.dbg.declare(metadata %struct.S* %zzz, metadata !18, metadata !19), !dbg !20
+ call void @llvm.dbg.declare(metadata %struct.S* %xxx, metadata !21, metadata !19), !dbg !22
+
+; dbg.declare for %zzz and %xxx are gone; replaced with dbg.declare based off the unsafe stack pointer
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_ARG:.*]], metadata ![[EXPR_ARG:.*]])
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_LOCAL:.*]], metadata ![[EXPR_LOCAL:.*]])
+; CHECK-NOT: call void @llvm.dbg.declare
+
+ call void @Capture(%struct.S* %zzz), !dbg !23
+ call void @Capture(%struct.S* %xxx), !dbg !24
+
+; dbg.declare appears before the first use
+; CHECK: call void @Capture
+; CHECK: call void @Capture
+
+ ret void, !dbg !25
+}
+
+; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz"
+; 100 aligned up to 8
+; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_deref, DW_OP_minus, 104
+
+; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx"
+; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_deref, DW_OP_minus, 208
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @Capture(%struct.S*) #2
+
+attributes #0 = { safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 800, align: 8, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZTS1S", file: !1, line: 5, baseType: !7, size: 800, align: 8)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 800, align: 8, elements: !9)
+!8 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!9 = !{!10}
+!10 = !DISubrange(count: 100)
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1S", scope: !1, file: !1, line: 10, type: !13, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !"_ZTS1S"}
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)"}
+!18 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 10, type: !"_ZTS1S")
+!19 = !DIExpression()
+!20 = !DILocation(line: 10, column: 10, scope: !12)
+!21 = !DILocalVariable(name: "xxx", scope: !12, file: !1, line: 11, type: !"_ZTS1S")
+!22 = !DILocation(line: 11, column: 5, scope: !12)
+!23 = !DILocation(line: 12, column: 3, scope: !12)
+!24 = !DILocation(line: 13, column: 3, scope: !12)
+!25 = !DILocation(line: 14, column: 1, scope: !12)
diff --git a/test/Transforms/SafeStack/ret.ll b/test/Transforms/SafeStack/ret.ll
new file mode 100644
index 000000000000..b2b8e5665297
--- /dev/null
+++ b/test/Transforms/SafeStack/ret.ll
@@ -0,0 +1,17 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Returns an alloca address.
+; Requires protector.
+
+define i64 @foo() nounwind readnone safestack {
+entry:
+ ; CHECK-LABEL: define i64 @foo(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret i64
+ %x = alloca [100 x i32], align 16
+ %0 = ptrtoint [100 x i32]* %x to i64
+ ret i64 %0
+}
diff --git a/test/Transforms/SafeStack/setjmp2.ll b/test/Transforms/SafeStack/setjmp2.ll
index 65fd920d63da..bb15d7e03ace 100644
--- a/test/Transforms/SafeStack/setjmp2.ll
+++ b/test/Transforms/SafeStack/setjmp2.ll
@@ -25,7 +25,7 @@ entry:
; CHECK-NEXT: %[[INTTOPTR:.*]] = inttoptr i64 %[[AND]] to i8*
; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** @__safestack_unsafe_stack_ptr
; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** %unsafe_stack_dynamic_ptr
- ; CHECK-NEXT: %[[ALLOCA:.*]] = inttoptr i64 %[[SUB]] to i32*
+ ; CHECK-NEXT: %[[ALLOCA:.*]] = bitcast i8* %[[INTTOPTR]] to i32*
%a = alloca i32, i32 %size
; CHECK: setjmp
diff --git a/test/Transforms/SafeStack/store.ll b/test/Transforms/SafeStack/store.ll
new file mode 100644
index 000000000000..f493dd038bb8
--- /dev/null
+++ b/test/Transforms/SafeStack/store.ll
@@ -0,0 +1,63 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+define void @bad_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @bad_store(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = ptrtoint i32* %a to i64
+ %1 = inttoptr i64 %0 to i64*
+ store i64 zeroinitializer, i64* %1
+ ret void
+}
+
+define void @good_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @good_store(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ store i8 zeroinitializer, i8* %0
+ ret void
+}
+
+define void @overflow_gep_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @overflow_gep_store(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = getelementptr i8, i8* %0, i32 4
+ store i8 zeroinitializer, i8* %1
+ ret void
+}
+
+define void @underflow_gep_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @underflow_gep_store(
+ ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = getelementptr i8, i8* %0, i32 -1
+ store i8 zeroinitializer, i8* %1
+ ret void
+}
+
+define void @good_gep_store() nounwind uwtable safestack {
+entry:
+ ; CHECK-LABEL: @good_gep_store(
+ ; CHECK-NOT: __safestack_unsafe_stack_ptr
+ ; CHECK: ret void
+ %a = alloca i32, align 4
+ %0 = bitcast i32* %a to i8*
+ %1 = getelementptr i8, i8* %0, i32 3
+ store i8 zeroinitializer, i8* %1
+ ret void
+}
diff --git a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
index cc7f0d4f2773..30e26cc81841 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
@@ -1,2 +1,2 @@
empty:100:0
-1.-3: 10
+ 1.-3: 10
diff --git a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
index abcb0ba38415..62227746655e 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
@@ -1,3 +1,3 @@
3empty:100:BAD
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
index 50fe86119b71..33b4c42cab44 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
@@ -1,3 +1,3 @@
double convert<std::string, float>(float):2909472:181842
-0: 181842
-1: 181842
+ 0: 181842
+ 1: 181842
diff --git a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
index 038c45f77e30..608affa3ff94 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
@@ -1,3 +1,3 @@
empty:100:0
-0: 0
-1: BAD
+ 0: 0
+ 1: BAD
diff --git a/test/Transforms/SampleProfile/Inputs/bad_samples.prof b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
index a121d8c1ac40..bce7db9708d2 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_samples.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
@@ -1,2 +1,2 @@
empty:100:0
-1.3: -10
+ 1.3: -10
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
index cd1cb5b1f16b..ac958e325c40 100644
--- a/test/Transforms/SampleProfile/Inputs/branch.prof
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -1,8 +1,10 @@
main:15680:0
-0: 0
-4: 0
-7: 0
-9: 10226
-10: 2243
-16: 0
-18: 0
+ 1: 2500
+ 4: 1000
+ 5: 1000
+ 6: 800
+ 7: 500
+ 9: 10226
+ 10: 2243
+ 16: 0
+ 18: 0
diff --git a/test/Transforms/SampleProfile/Inputs/calls.prof b/test/Transforms/SampleProfile/Inputs/calls.prof
index 57d3887dfb65..be64a1ead428 100644
--- a/test/Transforms/SampleProfile/Inputs/calls.prof
+++ b/test/Transforms/SampleProfile/Inputs/calls.prof
@@ -1,10 +1,10 @@
_Z3sumii:105580:5279
-0: 5279
-1: 5279
-2: 5279
+ 0: 5279
+ 1: 5279
+ 2: 5279
main:225715:0
-2.1: 5553
-3: 5391
-# This indicates that at line 3 of this function, the 'then' branch
-# of the conditional is taken (discriminator '1').
-3.1: 5752 _Z3sumii:5860
+ 2.1: 5553
+ 3: 5391
+ # This indicates that at line 3 of this function, the 'then' branch
+ # of the conditional is taken (discriminator '1').
+ 3.1: 5752 _Z3sumii:5860
diff --git a/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof b/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
new file mode 100644
index 000000000000..528e42ca3880
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
@@ -0,0 +1,10 @@
+main:20111403:0
+ 2.1: 404065
+ 3: 443089
+ 3.1: 0
+ 4: 404066
+ 6: 0
+ 7: 0
+ 3.1: _Z12never_calledi:0
+ 0: 0
+ 1: 0
diff --git a/test/Transforms/SampleProfile/Inputs/coverage-warning.prof b/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
new file mode 100644
index 000000000000..57989b837a0f
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
@@ -0,0 +1,5 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
+# This profile is stale. Function foo() does not have a line 8 anymore.
+ 8: 1700
diff --git a/test/Transforms/SampleProfile/Inputs/discriminator.prof b/test/Transforms/SampleProfile/Inputs/discriminator.prof
index a6bcbc511a16..0c2561d725c3 100644
--- a/test/Transforms/SampleProfile/Inputs/discriminator.prof
+++ b/test/Transforms/SampleProfile/Inputs/discriminator.prof
@@ -1,8 +1,8 @@
foo:1000:0
-1: 1
-2: 1
-2.1: 100
-3: 100
-3.1: 5
-4: 100
-5: 1
+ 1: 1
+ 2: 1
+ 2.1: 100
+ 3: 100
+ 3.1: 5
+ 4: 100
+ 5: 1
diff --git a/test/Transforms/SampleProfile/Inputs/entry_counts.prof b/test/Transforms/SampleProfile/Inputs/entry_counts.prof
index 5c2172b5a4d3..95addc9f7a11 100644
--- a/test/Transforms/SampleProfile/Inputs/entry_counts.prof
+++ b/test/Transforms/SampleProfile/Inputs/entry_counts.prof
@@ -1,3 +1,3 @@
empty:100:13293
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
index 14d7fd555dae..a074f53db945 100644
--- a/test/Transforms/SampleProfile/Inputs/fnptr.binprof
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
Binary files differ
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.prof b/test/Transforms/SampleProfile/Inputs/fnptr.prof
index 6a3b4e2315bb..2491c427393b 100644
--- a/test/Transforms/SampleProfile/Inputs/fnptr.prof
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.prof
@@ -1,12 +1,12 @@
_Z3fooi:7711:610
-1: 610
+ 1: 610
_Z3bari:20301:1437
-1: 1437
+ 1: 1437
main:184019:0
-4: 534
-6: 2080
-9: 2064 _Z3bari:1471 _Z3fooi:631
-5.1: 1075
-5: 1075
-7: 534
-4.2: 534
+ 4: 534
+ 6: 2080
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
+ 5.1: 1075
+ 5: 1075
+ 7: 534
+ 4.2: 534
diff --git a/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo b/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
new file mode 100644
index 000000000000..93f22ce30533
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
Binary files differ
diff --git a/test/Transforms/SampleProfile/Inputs/inline-coverage.prof b/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
new file mode 100644
index 000000000000..3d792733149a
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
@@ -0,0 +1,7 @@
+main:501438:0
+ 2.1: 23478
+ 3: 23478
+ 4: 0
+ 0: 0
+ 3: _Z3fool:172746
+ 1: 31878
diff --git a/test/Transforms/SampleProfile/Inputs/inline-hint.prof b/test/Transforms/SampleProfile/Inputs/inline-hint.prof
new file mode 100644
index 000000000000..a6840346eb43
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-hint.prof
@@ -0,0 +1,3 @@
+_Z6hot_fnRxi:700:0
+_Z7cold_fnRxi:1:0
+other:299:0
diff --git a/test/Transforms/SampleProfile/Inputs/inline.prof b/test/Transforms/SampleProfile/Inputs/inline.prof
new file mode 100644
index 000000000000..386cdf8a7b5e
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline.prof
@@ -0,0 +1,7 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:5860
+ 0: 5279
+ 1: 5279
+ 2: 5279
diff --git a/test/Transforms/SampleProfile/Inputs/nolocinfo.prof b/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
new file mode 100644
index 000000000000..fc69aa8ae783
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
@@ -0,0 +1,3 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
diff --git a/test/Transforms/SampleProfile/Inputs/offset.prof b/test/Transforms/SampleProfile/Inputs/offset.prof
new file mode 100644
index 000000000000..b07ce3504fb2
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/offset.prof
@@ -0,0 +1,4 @@
+_Z3fooi:300:1
+ 65532: 1000
+ 65533: 10
+ 65535: 990
diff --git a/test/Transforms/SampleProfile/Inputs/propagate.prof b/test/Transforms/SampleProfile/Inputs/propagate.prof
index b28609be66c1..ee9c6d62dfd1 100644
--- a/test/Transforms/SampleProfile/Inputs/propagate.prof
+++ b/test/Transforms/SampleProfile/Inputs/propagate.prof
@@ -1,17 +1,17 @@
_Z3fooiil:58139:0
-0: 0
-1: 0
-2: 0
-4: 1
-5: 10
-6: 0
-7: 5
-8: 3
-9: 0
-10: 0
-11: 6339
-12: 16191
-13: 8141
-16: 1
-18: 0
-19: 0
+ 0: 0
+ 1: 0
+ 2: 0
+ 4: 1
+ 5: 10
+ 6: 2
+ 7: 5
+ 8: 3
+ 9: 0
+ 10: 0
+ 11: 6339
+ 12: 16191
+ 13: 8141
+ 16: 1
+ 18: 0
+ 19: 0
diff --git a/test/Transforms/SampleProfile/Inputs/remarks.prof b/test/Transforms/SampleProfile/Inputs/remarks.prof
new file mode 100644
index 000000000000..1e905834cf41
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/remarks.prof
@@ -0,0 +1,7 @@
+main:623868:0
+ 0: 0
+ 0: _Z3foov:623868
+ 3: 18346
+ 4: 0
+ 6: 19475
+ 2: 18305
diff --git a/test/Transforms/SampleProfile/Inputs/syntax.prof b/test/Transforms/SampleProfile/Inputs/syntax.prof
index f3738912a9dc..465212d86e84 100644
--- a/test/Transforms/SampleProfile/Inputs/syntax.prof
+++ b/test/Transforms/SampleProfile/Inputs/syntax.prof
@@ -1,3 +1,3 @@
empty:100:0
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 25bd455a044c..1700749f0be9 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -4,14 +4,14 @@
;
; #include <stdio.h>
; #include <stdlib.h>
-;
+
; int main(int argc, char *argv[]) {
; if (argc < 2)
; return 1;
; double result;
; int limit = atoi(argv[1]);
; if (limit > 100) {
-; double s = 23.041968;
+; double s = 23.041968 * atoi(argv[2]);
; for (int u = 0; u < limit; u++) {
; double x = s;
; s = x + 3.049 + (double)u;
@@ -19,7 +19,7 @@
; }
; result = s;
; } else {
-; result = 0;
+; result = atoi(argv[2]);
; }
; printf("result is %lf\n", result);
; return 0;
@@ -27,117 +27,213 @@
@.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
-; Function Attrs: nounwind uwtable
-define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
+; Function Attrs: uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 !dbg !6 {
; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
entry:
- tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !13, metadata !DIExpression()), !dbg !27
- tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !14, metadata !DIExpression()), !dbg !27
- %cmp = icmp slt i32 %argc, 2, !dbg !28
- br i1 %cmp, label %return, label %if.end, !dbg !28
-; CHECK: edge entry -> return probability is 0 / 1 = 0%
-; CHECK: edge entry -> if.end probability is 1 / 1 = 100%
+ %retval = alloca i32, align 4
+ %argc.addr = alloca i32, align 4
+ %argv.addr = alloca i8**, align 8
+ %result = alloca double, align 8
+ %limit = alloca i32, align 4
+ %s = alloca double, align 8
+ %u = alloca i32, align 4
+ %x = alloca double, align 8
+ store i32 0, i32* %retval, align 4
+ store i32 %argc, i32* %argc.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !16, metadata !17), !dbg !18
+ store i8** %argv, i8*** %argv.addr, align 8
+ call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !19, metadata !17), !dbg !20
+ %0 = load i32, i32* %argc.addr, align 4, !dbg !21
+ %cmp = icmp slt i32 %0, 2, !dbg !23
+ br i1 %cmp, label %if.then, label %if.end, !dbg !24
+; CHECK: edge entry -> if.then probability is 0x4ccccccd / 0x80000000 = 60.00%
+; CHECK: edge entry -> if.end probability is 0x33333333 / 0x80000000 = 40.00%
+
+if.then: ; preds = %entry
+ store i32 1, i32* %retval, align 4, !dbg !25
+ br label %return, !dbg !25
if.end: ; preds = %entry
- %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !30
- %0 = load i8*, i8** %arrayidx, align 8, !dbg !30, !tbaa !31
- %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
- tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
- %cmp1 = icmp sgt i32 %call, 100, !dbg !35
- br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
-; CHECK: edge if.end -> for.body probability is 0 / 1 = 0%
-; CHECK: edge if.end -> if.end6 probability is 1 / 1 = 100%
+ call void @llvm.dbg.declare(metadata double* %result, metadata !26, metadata !17), !dbg !27
+ call void @llvm.dbg.declare(metadata i32* %limit, metadata !28, metadata !17), !dbg !29
+ %1 = load i8**, i8*** %argv.addr, align 8, !dbg !30
+ %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1, !dbg !30
+ %2 = load i8*, i8** %arrayidx, align 8, !dbg !30
+ %call = call i32 @atoi(i8* %2) #4, !dbg !31
+ store i32 %call, i32* %limit, align 4, !dbg !29
+ %3 = load i32, i32* %limit, align 4, !dbg !32
+ %cmp1 = icmp sgt i32 %3, 100, !dbg !34
+ br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35
+; CHECK: edge if.end -> if.then.2 probability is 0x66666666 / 0x80000000 = 80.00%
+; CHECK: edge if.end -> if.else probability is 0x1999999a / 0x80000000 = 20.00%
-for.body: ; preds = %if.end, %for.body
- %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
- %s.015 = phi double [ %sub, %for.body ], [ 0x40370ABE6A337A81, %if.end ]
- %add = fadd double %s.015, 3.049000e+00, !dbg !36
- %conv = sitofp i32 %u.016 to double, !dbg !36
- %add4 = fadd double %add, %conv, !dbg !36
- tail call void @llvm.dbg.value(metadata double %add4, i64 0, metadata !18, metadata !DIExpression()), !dbg !36
- %div = fdiv double 3.940000e+00, %s.015, !dbg !37
- %mul = fmul double %div, 3.200000e-01, !dbg !37
- %add5 = fadd double %add4, %mul, !dbg !37
- %sub = fsub double %add4, %add5, !dbg !37
- tail call void @llvm.dbg.value(metadata double %sub, i64 0, metadata !18, metadata !DIExpression()), !dbg !37
- %inc = add nsw i32 %u.016, 1, !dbg !38
- tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !21, metadata !DIExpression()), !dbg !38
- %exitcond = icmp eq i32 %inc, %call, !dbg !38
- br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
-; CHECK: edge for.body -> if.end6 probability is 0 / 10226 = 0%
-; CHECK: edge for.body -> for.body probability is 10226 / 10226 = 100% [HOT edge]
+if.then.2: ; preds = %if.end
+ call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38
+ %4 = load i8**, i8*** %argv.addr, align 8, !dbg !39
+ %arrayidx3 = getelementptr inbounds i8*, i8** %4, i64 2, !dbg !39
+ %5 = load i8*, i8** %arrayidx3, align 8, !dbg !39
+ %call4 = call i32 @atoi(i8* %5) #4, !dbg !40
+ %conv = sitofp i32 %call4 to double, !dbg !40
+ %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41
+ store double %mul, double* %s, align 8, !dbg !38
+ call void @llvm.dbg.declare(metadata i32* %u, metadata !42, metadata !17), !dbg !44
+ store i32 0, i32* %u, align 4, !dbg !44
+ br label %for.cond, !dbg !45
-if.end6: ; preds = %for.body, %if.end
- %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
- %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
- br label %return, !dbg !40
-; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
+for.cond: ; preds = %for.inc, %if.then.2
+ %6 = load i32, i32* %u, align 4, !dbg !46
+ %7 = load i32, i32* %limit, align 4, !dbg !48
+ %cmp5 = icmp slt i32 %6, %7, !dbg !49
+ br i1 %cmp5, label %for.body, label %for.end, !dbg !50
-return: ; preds = %entry, %if.end6
- %retval.0 = phi i32 [ 0, %if.end6 ], [ 1, %entry ]
- ret i32 %retval.0, !dbg !41
-}
+for.body: ; preds = %for.cond
+ call void @llvm.dbg.declare(metadata double* %x, metadata !51, metadata !17), !dbg !53
+ %8 = load double, double* %s, align 8, !dbg !54
+ store double %8, double* %x, align 8, !dbg !53
+ %9 = load double, double* %x, align 8, !dbg !55
+ %add = fadd double %9, 3.049000e+00, !dbg !56
+ %10 = load i32, i32* %u, align 4, !dbg !57
+ %conv6 = sitofp i32 %10 to double, !dbg !57
+ %add7 = fadd double %add, %conv6, !dbg !58
+ store double %add7, double* %s, align 8, !dbg !59
+ %11 = load double, double* %s, align 8, !dbg !60
+ %12 = load double, double* %x, align 8, !dbg !61
+ %div = fdiv double 3.940000e+00, %12, !dbg !62
+ %mul8 = fmul double %div, 3.200000e-01, !dbg !63
+ %add9 = fadd double %11, %mul8, !dbg !64
+ %13 = load double, double* %s, align 8, !dbg !65
+ %sub = fsub double %13, %add9, !dbg !65
+ store double %sub, double* %s, align 8, !dbg !65
+ br label %for.inc, !dbg !66
-; Function Attrs: nounwind readonly
-declare i32 @atoi(i8* nocapture) #1
+for.inc: ; preds = %for.body
+ %14 = load i32, i32* %u, align 4, !dbg !67
+ %inc = add nsw i32 %14, 1, !dbg !67
+ store i32 %inc, i32* %u, align 4, !dbg !67
+ br label %for.cond, !dbg !68
+
+for.end: ; preds = %for.cond
+ %15 = load double, double* %s, align 8, !dbg !69
+ store double %15, double* %result, align 8, !dbg !70
+ br label %if.end.13, !dbg !71
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...) #2
+if.else: ; preds = %if.end
+ %16 = load i8**, i8*** %argv.addr, align 8, !dbg !72
+ %arrayidx10 = getelementptr inbounds i8*, i8** %16, i64 2, !dbg !72
+ %17 = load i8*, i8** %arrayidx10, align 8, !dbg !72
+ %call11 = call i32 @atoi(i8* %17) #4, !dbg !74
+ %conv12 = sitofp i32 %call11 to double, !dbg !74
+ store double %conv12, double* %result, align 8, !dbg !75
+ br label %if.end.13
+
+if.end.13: ; preds = %if.else, %for.end
+ %18 = load double, double* %result, align 8, !dbg !76
+ %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), double %18), !dbg !77
+ store i32 0, i32* %retval, align 4, !dbg !78
+ br label %return, !dbg !78
+
+return: ; preds = %if.end.13, %if.then
+ %19 = load i32, i32* %retval, align 4, !dbg !79
+ ret i32 %19, !dbg !79
+}
; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8*) #2
+
+declare i32 @printf(i8*, ...) #3
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { nounwind readonly }
!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!25, !42}
-!llvm.ident = !{!26}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 192896) (llvm/trunk 192895)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "branch.cc", directory: ".")
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5)
+!1 = !DIFile(filename: "test.cc", directory: "/ssd/llvm_commit")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 (i32, i8**)* @main, variables: !12)
-!5 = !DIFile(filename: "branch.cc", directory: ".")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !8, !9}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
-!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!12 = !{!13, !14, !15, !17, !18, !21, !23}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 4, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 4, arg: 2, scope: !4, file: !5, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "result", line: 7, scope: !4, file: !5, type: !16)
-!16 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "limit", line: 8, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 10, scope: !19, file: !5, type: !16)
-!19 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !20)
-!20 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u", line: 11, scope: !22, file: !5, type: !8)
-!22 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !19)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 12, scope: !24, file: !5, type: !16)
-!24 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !22)
-!25 = !{i32 2, !"Dwarf Version", i32 4}
-!26 = !{!"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
-!27 = !DILocation(line: 4, scope: !4)
-!28 = !DILocation(line: 5, scope: !29)
-!29 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !4)
-!30 = !DILocation(line: 8, scope: !4)
-!31 = !{!32, !32, i64 0}
-!32 = !{!"any pointer", !33, i64 0}
-!33 = !{!"omnipotent char", !34, i64 0}
-!34 = !{!"Simple C/C++ TBAA"}
-!35 = !DILocation(line: 9, scope: !20)
-!36 = !DILocation(line: 13, scope: !24)
-!37 = !DILocation(line: 14, scope: !24)
-!38 = !DILocation(line: 11, scope: !22)
-!39 = !DILocation(line: 20, scope: !4)
-!40 = !DILocation(line: 21, scope: !4)
-!41 = !DILocation(line: 22, scope: !4)
-!42 = !{i32 1, !"Debug Info Version", i32 3}
+!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!5 = !{!6}
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64, align: 64)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64, align: 64)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)"}
+!16 = !DILocalVariable(name: "argc", arg: 1, scope: !6, file: !1, line: 4, type: !9)
+!17 = !DIExpression()
+!18 = !DILocation(line: 4, column: 15, scope: !6)
+!19 = !DILocalVariable(name: "argv", arg: 2, scope: !6, file: !1, line: 4, type: !10)
+!20 = !DILocation(line: 4, column: 27, scope: !6)
+!21 = !DILocation(line: 5, column: 8, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !6, file: !1, line: 5, column: 8)
+!23 = !DILocation(line: 5, column: 13, scope: !22)
+!24 = !DILocation(line: 5, column: 8, scope: !6)
+!25 = !DILocation(line: 6, column: 6, scope: !22)
+!26 = !DILocalVariable(name: "result", scope: !6, file: !1, line: 7, type: !4)
+!27 = !DILocation(line: 7, column: 11, scope: !6)
+!28 = !DILocalVariable(name: "limit", scope: !6, file: !1, line: 8, type: !9)
+!29 = !DILocation(line: 8, column: 8, scope: !6)
+!30 = !DILocation(line: 8, column: 21, scope: !6)
+!31 = !DILocation(line: 8, column: 16, scope: !6)
+!32 = !DILocation(line: 9, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 9, column: 8)
+!34 = !DILocation(line: 9, column: 14, scope: !33)
+!35 = !DILocation(line: 9, column: 8, scope: !6)
+!36 = !DILocalVariable(name: "s", scope: !37, file: !1, line: 10, type: !4)
+!37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 9, column: 21)
+!38 = !DILocation(line: 10, column: 13, scope: !37)
+!39 = !DILocation(line: 10, column: 34, scope: !37)
+!40 = !DILocation(line: 10, column: 29, scope: !37)
+!41 = !DILocation(line: 10, column: 27, scope: !37)
+!42 = !DILocalVariable(name: "u", scope: !43, file: !1, line: 11, type: !9)
+!43 = distinct !DILexicalBlock(scope: !37, file: !1, line: 11, column: 6)
+!44 = !DILocation(line: 11, column: 15, scope: !43)
+!45 = !DILocation(line: 11, column: 11, scope: !43)
+!46 = !DILocation(line: 11, column: 22, scope: !47)
+!47 = distinct !DILexicalBlock(scope: !43, file: !1, line: 11, column: 6)
+!48 = !DILocation(line: 11, column: 26, scope: !47)
+!49 = !DILocation(line: 11, column: 24, scope: !47)
+!50 = !DILocation(line: 11, column: 6, scope: !43)
+!51 = !DILocalVariable(name: "x", scope: !52, file: !1, line: 12, type: !4)
+!52 = distinct !DILexicalBlock(scope: !47, file: !1, line: 11, column: 38)
+!53 = !DILocation(line: 12, column: 15, scope: !52)
+!54 = !DILocation(line: 12, column: 19, scope: !52)
+!55 = !DILocation(line: 13, column: 12, scope: !52)
+!56 = !DILocation(line: 13, column: 14, scope: !52)
+!57 = !DILocation(line: 13, column: 32, scope: !52)
+!58 = !DILocation(line: 13, column: 22, scope: !52)
+!59 = !DILocation(line: 13, column: 10, scope: !52)
+!60 = !DILocation(line: 14, column: 13, scope: !52)
+!61 = !DILocation(line: 14, column: 24, scope: !52)
+!62 = !DILocation(line: 14, column: 22, scope: !52)
+!63 = !DILocation(line: 14, column: 26, scope: !52)
+!64 = !DILocation(line: 14, column: 15, scope: !52)
+!65 = !DILocation(line: 14, column: 10, scope: !52)
+!66 = !DILocation(line: 15, column: 6, scope: !52)
+!67 = !DILocation(line: 11, column: 34, scope: !47)
+!68 = !DILocation(line: 11, column: 6, scope: !47)
+!69 = !DILocation(line: 16, column: 15, scope: !37)
+!70 = !DILocation(line: 16, column: 13, scope: !37)
+!71 = !DILocation(line: 17, column: 4, scope: !37)
+!72 = !DILocation(line: 18, column: 20, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !33, file: !1, line: 17, column: 11)
+!74 = !DILocation(line: 18, column: 15, scope: !73)
+!75 = !DILocation(line: 18, column: 13, scope: !73)
+!76 = !DILocation(line: 20, column: 30, scope: !6)
+!77 = !DILocation(line: 20, column: 4, scope: !6)
+!78 = !DILocation(line: 21, column: 4, scope: !6)
+!79 = !DILocation(line: 22, column: 2, scope: !6)
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 10f43a1a6126..53ea9297d7d0 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -24,7 +24,7 @@
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
; Function Attrs: nounwind uwtable
-define i32 @_Z3sumii(i32 %x, i32 %y) {
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
@@ -37,7 +37,7 @@ entry:
}
; Function Attrs: uwtable
-define i32 @main() {
+define i32 @main() !dbg !7 {
entry:
%retval = alloca i32, align 4
%s = alloca i32, align 4
@@ -52,8 +52,8 @@ while.cond: ; preds = %if.end, %entry
store i32 %inc, i32* %i, align 4, !dbg !14
%cmp = icmp slt i32 %0, 400000000, !dbg !14
br i1 %cmp, label %while.body, label %while.end, !dbg !14
-; CHECK: edge while.cond -> while.body probability is 5391 / 5391 = 100% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 0 / 5391 = 0%
+; CHECK: edge while.cond -> while.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x00000000 / 0x80000000 = 0.00%
while.body: ; preds = %while.cond
%1 = load i32, i32* %i, align 4, !dbg !16
@@ -63,8 +63,8 @@ while.body: ; preds = %while.cond
; both branches out of while.body had the same weight. In reality,
; the edge while.body->if.then is taken most of the time.
;
-; CHECK: edge while.body -> if.then probability is 5752 / 5752 = 100% [HOT edge]
-; CHECK: edge while.body -> if.else probability is 0 / 5752 = 0%
+; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge while.body -> if.else probability is 0x00000000 / 0x80000000 = 0.00%
if.then: ; preds = %while.body
@@ -92,14 +92,14 @@ declare i32 @printf(i8*, ...) #2
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32, i32)* @_Z3sumii, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "calls.cc", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/cov-zero-samples.ll b/test/Transforms/SampleProfile/cov-zero-samples.ll
new file mode 100644
index 000000000000..d81e6438ee01
--- /dev/null
+++ b/test/Transforms/SampleProfile/cov-zero-samples.ll
@@ -0,0 +1,142 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: remark: cov-zero-samples.cc:9:25: Applied 404065 samples from profile (offset: 2.1)
+; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3)
+; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1)
+; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4)
+; CHECK: remark: cov-zero-samples.cc:13:25: Applied 0 samples from profile (offset: 6)
+; CHECK: remark: cov-zero-samples.cc:14:3: Applied 0 samples from profile (offset: 7)
+; CHECK: remark: cov-zero-samples.cc:10:9: most popular destination for conditional branches at cov-zero-samples.cc:9:3
+; CHECK: remark: cov-zero-samples.cc:11:12: most popular destination for conditional branches at cov-zero-samples.cc:10:9
+;
+; Coverage for this profile should be 100%
+; CHECK-NOT: warning: cov-zero-samples.cc:1:
+
+@N = global i64 8000000000, align 8
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z12never_calledi(i32 %i) !dbg !4 {
+entry:
+ ret i32 0, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; Function Attrs: norecurse uwtable
+define i32 @main() !dbg !8 {
+entry:
+ %retval = alloca i32, align 4
+ %sum = alloca i32, align 4
+ %i = alloca i64, align 8
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i32* %sum, metadata !33, metadata !19), !dbg !34
+ store i32 0, i32* %sum, align 4, !dbg !34
+ call void @llvm.dbg.declare(metadata i64* %i, metadata !35, metadata !19), !dbg !37
+ store i64 0, i64* %i, align 8, !dbg !37
+ br label %for.cond, !dbg !38
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i64, i64* %i, align 8, !dbg !39
+ %1 = load volatile i64, i64* @N, align 8, !dbg !42
+ %cmp = icmp slt i64 %0, %1, !dbg !43
+ br i1 %cmp, label %for.body, label %for.end, !dbg !44
+
+for.body: ; preds = %for.cond
+ %2 = load i64, i64* %i, align 8, !dbg !45
+ %3 = load volatile i64, i64* @N, align 8, !dbg !48
+ %cmp1 = icmp sgt i64 %2, %3, !dbg !49
+ br i1 %cmp1, label %if.then, label %if.end, !dbg !50
+
+if.then: ; preds = %for.body
+ %4 = load i64, i64* %i, align 8, !dbg !51
+ %conv = trunc i64 %4 to i32, !dbg !51
+ %call = call i32 @_Z12never_calledi(i32 %conv), !dbg !53
+ %5 = load i32, i32* %sum, align 4, !dbg !54
+ %add = add nsw i32 %5, %call, !dbg !54
+ store i32 %add, i32* %sum, align 4, !dbg !54
+ br label %if.end, !dbg !55
+
+if.end: ; preds = %if.then, %for.body
+ %6 = load i64, i64* %i, align 8, !dbg !56
+ %div = sdiv i64 %6, 239, !dbg !57
+ %7 = load i32, i32* %sum, align 4, !dbg !58
+ %conv2 = sext i32 %7 to i64, !dbg !58
+ %mul = mul nsw i64 %conv2, %div, !dbg !58
+ %conv3 = trunc i64 %mul to i32, !dbg !58
+ store i32 %conv3, i32* %sum, align 4, !dbg !58
+ br label %for.inc, !dbg !59
+
+for.inc: ; preds = %if.end
+ %8 = load i64, i64* %i, align 8, !dbg !60
+ %inc = add nsw i64 %8, 1, !dbg !60
+ store i64 %inc, i64* %i, align 8, !dbg !60
+ br label %for.cond, !dbg !62
+
+for.end: ; preds = %for.cond
+ %9 = load i32, i32* %sum, align 4, !dbg !63
+ %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %9), !dbg !64
+ ret i32 0, !dbg !65
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !11)
+!1 = !DIFile(filename: "cov-zero-samples.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !9, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!7}
+!11 = !{!12}
+!12 = !DIGlobalVariable(name: "N", scope: !0, file: !1, line: 3, type: !13, isLocal: false, isDefinition: true, variable: i64* @N)
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !14)
+!14 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)"}
+!19 = !DIExpression()
+!31 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 3)
+!32 = !DILocation(line: 5, column: 27, scope: !31)
+!33 = !DILocalVariable(name: "sum", scope: !8, file: !1, line: 8, type: !7)
+!34 = !DILocation(line: 8, column: 7, scope: !8)
+!35 = !DILocalVariable(name: "i", scope: !36, file: !1, line: 9, type: !14)
+!36 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9, column: 3)
+!37 = !DILocation(line: 9, column: 18, scope: !36)
+!38 = !DILocation(line: 9, column: 8, scope: !36)
+!39 = !DILocation(line: 9, column: 25, scope: !40)
+!40 = !DILexicalBlockFile(scope: !41, file: !1, discriminator: 1)
+!41 = distinct !DILexicalBlock(scope: !36, file: !1, line: 9, column: 3)
+!42 = !DILocation(line: 9, column: 29, scope: !40)
+!43 = !DILocation(line: 9, column: 27, scope: !40)
+!44 = !DILocation(line: 9, column: 3, scope: !40)
+!45 = !DILocation(line: 10, column: 9, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !47, file: !1, line: 10, column: 9)
+!47 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 37)
+!48 = !DILocation(line: 10, column: 13, scope: !46)
+!49 = !DILocation(line: 10, column: 11, scope: !46)
+!50 = !DILocation(line: 10, column: 9, scope: !47)
+!51 = !DILocation(line: 10, column: 36, scope: !52)
+!52 = !DILexicalBlockFile(scope: !46, file: !1, discriminator: 1)
+!53 = !DILocation(line: 10, column: 23, scope: !52)
+!54 = !DILocation(line: 10, column: 20, scope: !52)
+!55 = !DILocation(line: 10, column: 16, scope: !52)
+!56 = !DILocation(line: 11, column: 12, scope: !47)
+!57 = !DILocation(line: 11, column: 14, scope: !47)
+!58 = !DILocation(line: 11, column: 9, scope: !47)
+!59 = !DILocation(line: 12, column: 3, scope: !47)
+!60 = !DILocation(line: 9, column: 33, scope: !61)
+!61 = !DILexicalBlockFile(scope: !41, file: !1, discriminator: 2)
+!62 = !DILocation(line: 9, column: 3, scope: !61)
+!63 = !DILocation(line: 13, column: 25, scope: !8)
+!64 = !DILocation(line: 13, column: 3, scope: !8)
+!65 = !DILocation(line: 14, column: 3, scope: !8)
diff --git a/test/Transforms/SampleProfile/coverage-warning.ll b/test/Transforms/SampleProfile/coverage-warning.ll
new file mode 100644
index 000000000000..14a2710b0810
--- /dev/null
+++ b/test/Transforms/SampleProfile/coverage-warning.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
+define i32 @foo(i32 %i) !dbg !4 {
+; The profile has samples for line locations that are no longer present.
+; Coverage does not reach 90%, so we should get this warning:
+;
+; CHECK: warning: coverage-warning.c:1: 2 of 3 available profile records (66%) were applied
+; CHECK: warning: coverage-warning.c:1: 29000 of 30700 available profile samples (94%) were applied
+entry:
+ %retval = alloca i32, align 4
+ %i.addr = alloca i32, align 4
+ store i32 %i, i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4, !dbg !9
+ %cmp = icmp sgt i32 %0, 1000, !dbg !10
+ br i1 %cmp, label %if.then, label %if.end, !dbg !9
+
+if.then: ; preds = %entry
+ store i32 30, i32* %retval, align 4, !dbg !11
+ br label %return, !dbg !11
+
+if.end: ; preds = %entry
+ store i32 3, i32* %retval, align 4, !dbg !12
+ br label %return, !dbg !12
+
+return: ; preds = %if.end, %if.then
+ %1 = load i32, i32* %retval, align 4, !dbg !13
+ ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "coverage-warning.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)"}
+!9 = !DILocation(line: 2, column: 7, scope: !4)
+!10 = !DILocation(line: 2, column: 9, scope: !4)
+!11 = !DILocation(line: 3, column: 5, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
index 2445c5c5d609..0915fc884f82 100644
--- a/test/Transforms/SampleProfile/discriminator.ll
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -21,7 +21,7 @@
; This means that the predicate 'i < 5' (line 3) is executed 100 times,
; but the then branch (line 3.1) is only executed 5 times.
-define i32 @foo(i32 %i) #0 {
+define i32 @foo(i32 %i) #0 !dbg !4 {
; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo':
entry:
%i.addr = alloca i32, align 4
@@ -34,15 +34,15 @@ while.cond: ; preds = %if.end, %entry
%0 = load i32, i32* %i.addr, align 4, !dbg !12
%cmp = icmp slt i32 %0, 100, !dbg !12
br i1 %cmp, label %while.body, label %while.end, !dbg !12
-; CHECK: edge while.cond -> while.body probability is 100 / 101 = 99.0099% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 1 / 101 = 0.990099%
+; CHECK: edge while.cond -> while.body probability is 0x7ebb907a / 0x80000000 = 99.01% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x01446f86 / 0x80000000 = 0.99%
while.body: ; preds = %while.cond
%1 = load i32, i32* %i.addr, align 4, !dbg !14
%cmp1 = icmp slt i32 %1, 50, !dbg !14
br i1 %cmp1, label %if.then, label %if.end, !dbg !14
-; CHECK: edge while.body -> if.then probability is 5 / 100 = 5%
-; CHECK: edge while.body -> if.end probability is 95 / 100 = 95% [HOT edge]
+; CHECK: edge while.body -> if.then probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge while.body -> if.end probability is 0x7999999a / 0x80000000 = 95.00% [HOT edge]
if.then: ; preds = %while.body
%2 = load i32, i32* %x, align 4, !dbg !17
@@ -66,11 +66,11 @@ while.end: ; preds = %while.cond
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "discriminator.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "discriminator.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/SampleProfile/entry_counts.ll b/test/Transforms/SampleProfile/entry_counts.ll
index bf66e693a9d1..50cd575295a9 100644
--- a/test/Transforms/SampleProfile/entry_counts.ll
+++ b/test/Transforms/SampleProfile/entry_counts.ll
@@ -3,7 +3,7 @@
; According to the profile, function empty() was called 13,293 times.
; CHECK: {{.*}} = !{!"function_entry_count", i64 13293}
-define void @empty() {
+define void @empty() !dbg !4 {
entry:
ret void, !dbg !9
}
@@ -12,11 +12,11 @@ entry:
!llvm.module.flags = !{!6, !7}
!llvm.ident = !{!8}
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "entry_counts.c", directory: "/usr/local/google/home/dnovillo/llvm/test/pgo")
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "entry_counts.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, function: void ()* @empty, variables: !2)
+!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
!5 = !DISubroutineType(types: !2)
!6 = !{i32 2, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
index 368da42fc8a1..7b07ca9679bb 100644
--- a/test/Transforms/SampleProfile/fnptr.ll
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -5,12 +5,12 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
-; CHECK: edge for.body3 -> if.then probability is 534 / 2598 = 20.5543%
-; CHECK: edge for.body3 -> if.else probability is 2064 / 2598 = 79.4457%
-; CHECK: edge for.inc -> for.inc12 probability is 1052 / 2598 = 40.4927%
-; CHECK: edge for.inc -> for.body3 probability is 1546 / 2598 = 59.5073%
-; CHECK: edge for.inc12 -> for.end14 probability is 518 / 1052 = 49.2395%
-; CHECK: edge for.inc12 -> for.cond1.preheader probability is 534 / 1052 = 50.7605%
+; CHECK: edge for.body3 -> if.then probability is 0x1a4f3959 / 0x80000000 = 20.55%
+; CHECK: edge for.body3 -> if.else probability is 0x65b0c6a7 / 0x80000000 = 79.45%
+; CHECK: edge for.inc -> for.inc12 probability is 0x33d4a4c1 / 0x80000000 = 40.49%
+; CHECK: edge for.inc -> for.body3 probability is 0x4c2b5b3f / 0x80000000 = 59.51%
+; CHECK: edge for.inc12 -> for.end14 probability is 0x3f06d04e / 0x80000000 = 49.24%
+; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x40f92fb2 / 0x80000000 = 50.76%
; Original C++ test case.
;
@@ -46,7 +46,7 @@
@.str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1
-define double @_Z3fooi(i32 %x) #0 {
+define double @_Z3fooi(i32 %x) #0 !dbg !3 {
entry:
%conv = sitofp i32 %x to double, !dbg !2
%call = tail call double @sin(double %conv) #3, !dbg !8
@@ -56,7 +56,7 @@ entry:
declare double @sin(double) #1
-define double @_Z3bari(i32 %x) #0 {
+define double @_Z3bari(i32 %x) #0 !dbg !10 {
entry:
%conv = sitofp i32 %x to double, !dbg !9
%call = tail call double @cos(double %conv) #3, !dbg !11
@@ -66,7 +66,7 @@ entry:
declare double @cos(double) #1
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !13 {
entry:
br label %for.cond1.preheader, !dbg !12
@@ -130,17 +130,17 @@ declare i32 @printf(i8* nocapture readonly, ...) #1
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{!"clang version 3.6.0 "}
!2 = !DILocation(line: 9, column: 3, scope: !3)
-!3 = !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, function: double (i32)* @_Z3fooi, variables: !7)
+!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, variables: !7)
!4 = !DIFile(filename: "fnptr.cc", directory: ".")
!5 = !DIFile(filename: "fnptr.cc", directory: ".")
!6 = !DISubroutineType(types: !7)
!7 = !{}
!8 = !DILocation(line: 9, column: 14, scope: !3)
!9 = !DILocation(line: 13, column: 3, scope: !10)
-!10 = !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, function: double (i32)* @_Z3bari, variables: !7)
+!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, variables: !7)
!11 = !DILocation(line: 13, column: 14, scope: !10)
!12 = !DILocation(line: 19, column: 3, scope: !13)
-!13 = !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, function: i32 ()* @main, variables: !7)
+!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, variables: !7)
!14 = !DILocation(line: 20, column: 5, scope: !13)
!15 = !DILocation(line: 21, column: 15, scope: !13)
!16 = !DILocation(line: 22, column: 11, scope: !13)
diff --git a/test/Transforms/SampleProfile/gcc-simple.ll b/test/Transforms/SampleProfile/gcc-simple.ll
new file mode 100644
index 000000000000..1ae927158c11
--- /dev/null
+++ b/test/Transforms/SampleProfile/gcc-simple.ll
@@ -0,0 +1,218 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
+; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+; Original code:
+;
+; #include <stdlib.h>
+;
+; long long int foo(long i) {
+; if (rand() < 500) return 2; else if (rand() > 5000) return 10; else return 90;
+; }
+;
+; int main() {
+; long long int sum = 0;
+; for (int k = 0; k < 3000; k++)
+; for (int i = 0; i < 200000; i++) sum += foo(i);
+; return sum > 0 ? 0 : 1;
+; }
+;
+; This test was compiled down to bytecode at -O0 to avoid inlining foo() into
+; main(). The profile was generated using a GCC-generated binary (also compiled
+; at -O0). The conversion from the Linux Perf profile to the GCC autofdo
+; profile used the converter at https://github.com/google/autofdo
+;
+; $ gcc -g -O0 gcc-simple.cc -o gcc-simple
+; $ perf record -b ./gcc-simple
+; $ create_gcov --binary=gcc-simple --gcov=gcc-simple.afdo
+
+define i64 @_Z3fool(i64 %i) #0 !dbg !4 {
+; CHECK: !prof ![[EC1:[0-9]+]]
+entry:
+ %retval = alloca i64, align 8
+ %i.addr = alloca i64, align 8
+ store i64 %i, i64* %i.addr, align 8
+ call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+ %call = call i32 @rand() #3, !dbg !19
+ %cmp = icmp slt i32 %call, 500, !dbg !21
+ br i1 %cmp, label %if.then, label %if.else, !dbg !22
+; CHECK: !prof ![[PROF1:[0-9]+]]
+
+if.then: ; preds = %entry
+ store i64 2, i64* %retval, align 8, !dbg !23
+ br label %return, !dbg !23
+
+if.else: ; preds = %entry
+ %call1 = call i32 @rand() #3, !dbg !25
+ %cmp2 = icmp sgt i32 %call1, 5000, !dbg !28
+ br i1 %cmp2, label %if.then.3, label %if.else.4, !dbg !29
+; CHECK: !prof ![[PROF2:[0-9]+]]
+
+if.then.3: ; preds = %if.else
+ store i64 10, i64* %retval, align 8, !dbg !30
+ br label %return, !dbg !30
+
+if.else.4: ; preds = %if.else
+ store i64 90, i64* %retval, align 8, !dbg !32
+ br label %return, !dbg !32
+
+return: ; preds = %if.else.4, %if.then.3, %if.then
+ %0 = load i64, i64* %retval, align 8, !dbg !34
+ ret i64 %0, !dbg !34
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare i32 @rand() #2
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !9 {
+; CHECK: !prof ![[EC2:[0-9]+]]
+entry:
+ %retval = alloca i32, align 4
+ %sum = alloca i64, align 8
+ %k = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i64* %sum, metadata !35, metadata !17), !dbg !36
+ store i64 0, i64* %sum, align 8, !dbg !36
+ call void @llvm.dbg.declare(metadata i32* %k, metadata !37, metadata !17), !dbg !39
+ store i32 0, i32* %k, align 4, !dbg !39
+ br label %for.cond, !dbg !40
+
+for.cond: ; preds = %for.inc.4, %entry
+ %0 = load i32, i32* %k, align 4, !dbg !41
+ %cmp = icmp slt i32 %0, 3000, !dbg !45
+ br i1 %cmp, label %for.body, label %for.end.6, !dbg !46
+; CHECK: !prof ![[PROF3:[0-9]+]]
+
+for.body: ; preds = %for.cond
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !47, metadata !17), !dbg !49
+ store i32 0, i32* %i, align 4, !dbg !49
+ br label %for.cond.1, !dbg !50
+
+for.cond.1: ; preds = %for.inc, %for.body
+ %1 = load i32, i32* %i, align 4, !dbg !51
+ %cmp2 = icmp slt i32 %1, 200000, !dbg !55
+ br i1 %cmp2, label %for.body.3, label %for.end, !dbg !56
+; CHECK: !prof ![[PROF4:[0-9]+]]
+
+for.body.3: ; preds = %for.cond.1
+ %2 = load i32, i32* %i, align 4, !dbg !57
+ %conv = sext i32 %2 to i64, !dbg !57
+ %call = call i64 @_Z3fool(i64 %conv), !dbg !59
+ %3 = load i64, i64* %sum, align 8, !dbg !60
+ %add = add nsw i64 %3, %call, !dbg !60
+ store i64 %add, i64* %sum, align 8, !dbg !60
+ br label %for.inc, !dbg !61
+
+for.inc: ; preds = %for.body.3
+ %4 = load i32, i32* %i, align 4, !dbg !62
+ %inc = add nsw i32 %4, 1, !dbg !62
+ store i32 %inc, i32* %i, align 4, !dbg !62
+ br label %for.cond.1, !dbg !64
+
+for.end: ; preds = %for.cond.1
+ br label %for.inc.4, !dbg !65
+
+for.inc.4: ; preds = %for.end
+ %5 = load i32, i32* %k, align 4, !dbg !67
+ %inc5 = add nsw i32 %5, 1, !dbg !67
+ store i32 %inc5, i32* %k, align 4, !dbg !67
+ br label %for.cond, !dbg !68
+
+for.end.6: ; preds = %for.cond
+ %6 = load i64, i64* %sum, align 8, !dbg !69
+ %cmp7 = icmp sgt i64 %6, 0, !dbg !70
+ %cond = select i1 %cmp7, i32 0, i32 1, !dbg !69
+ ret i32 %cond, !dbg !71
+}
+
+; CHECK ![[EC1]] = !{!"function_entry_count", i64 24108}
+; CHECK ![[PROF1]] = !{!"branch_weights", i32 1, i32 30124}
+; CHECK ![[PROF2]] = !{!"branch_weights", i32 30177, i32 29579}
+; CHECK ![[EC2]] = !{!"function_entry_count", i64 0}
+; CHECK ![[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK ![[PROF4]] = !{!"branch_weights", i32 1, i32 20238}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "discriminator.cc", directory: "/usr/local/google/home/dnovillo/llvm/test/autofdo")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!21 = !DILocation(line: 4, column: 14, scope: !20)
+!22 = !DILocation(line: 4, column: 7, scope: !4)
+!23 = !DILocation(line: 4, column: 21, scope: !24)
+!24 = !DILexicalBlockFile(scope: !20, file: !1, discriminator: 1)
+!25 = !DILocation(line: 4, column: 40, scope: !26)
+!26 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 2)
+!27 = distinct !DILexicalBlock(scope: !20, file: !1, line: 4, column: 40)
+!28 = !DILocation(line: 4, column: 47, scope: !27)
+!29 = !DILocation(line: 4, column: 40, scope: !20)
+!30 = !DILocation(line: 4, column: 55, scope: !31)
+!31 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 3)
+!32 = !DILocation(line: 4, column: 71, scope: !33)
+!33 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 4)
+!34 = !DILocation(line: 5, column: 1, scope: !4)
+!35 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!36 = !DILocation(line: 8, column: 17, scope: !9)
+!37 = !DILocalVariable(name: "k", scope: !38, file: !1, line: 9, type: !12)
+!38 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!39 = !DILocation(line: 9, column: 12, scope: !38)
+!40 = !DILocation(line: 9, column: 8, scope: !38)
+!41 = !DILocation(line: 9, column: 19, scope: !42)
+!42 = !DILexicalBlockFile(scope: !43, file: !1, discriminator: 2)
+!43 = !DILexicalBlockFile(scope: !44, file: !1, discriminator: 1)
+!44 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 3)
+!45 = !DILocation(line: 9, column: 21, scope: !44)
+!46 = !DILocation(line: 9, column: 3, scope: !38)
+!47 = !DILocalVariable(name: "i", scope: !48, file: !1, line: 10, type: !12)
+!48 = distinct !DILexicalBlock(scope: !44, file: !1, line: 10, column: 5)
+!49 = !DILocation(line: 10, column: 14, scope: !48)
+!50 = !DILocation(line: 10, column: 10, scope: !48)
+!51 = !DILocation(line: 10, column: 21, scope: !52)
+!52 = !DILexicalBlockFile(scope: !53, file: !1, discriminator: 5)
+!53 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 1)
+!54 = distinct !DILexicalBlock(scope: !48, file: !1, line: 10, column: 5)
+!55 = !DILocation(line: 10, column: 23, scope: !54)
+!56 = !DILocation(line: 10, column: 5, scope: !48)
+!57 = !DILocation(line: 10, column: 49, scope: !58)
+!58 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 2)
+!59 = !DILocation(line: 10, column: 45, scope: !54)
+!60 = !DILocation(line: 10, column: 42, scope: !54)
+!61 = !DILocation(line: 10, column: 38, scope: !54)
+!62 = !DILocation(line: 10, column: 34, scope: !63)
+!63 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 4)
+!64 = !DILocation(line: 10, column: 5, scope: !54)
+!65 = !DILocation(line: 10, column: 50, scope: !66)
+!66 = !DILexicalBlockFile(scope: !48, file: !1, discriminator: 3)
+!67 = !DILocation(line: 9, column: 30, scope: !44)
+!68 = !DILocation(line: 9, column: 3, scope: !44)
+!69 = !DILocation(line: 11, column: 10, scope: !9)
+!70 = !DILocation(line: 11, column: 14, scope: !9)
+!71 = !DILocation(line: 11, column: 3, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-coverage.ll b/test/Transforms/SampleProfile/inline-coverage.ll
new file mode 100644
index 000000000000..7248540b4f7c
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-coverage.ll
@@ -0,0 +1,135 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; Original code:
+;
+; 1 #include <stdlib.h>
+; 2
+; 3 long long int foo(long i) {
+; 4 return rand() * i;
+; 5 }
+; 6
+; 7 int main() {
+; 8 long long int sum = 0;
+; 9 for (int i = 0; i < 200000 * 3000; i++)
+; 10 sum += foo(i);
+; 11 return sum > 0 ? 0 : 1;
+; 12 }
+;
+; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' with 172746 samples into 'main'
+; CHECK: remark: coverage.cc:9:19: Applied 23478 samples from profile (offset: 2.1)
+; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)
+; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)
+; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)
+; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3
+;
+; There is one sample record with 0 samples at offset 4 in main() that we never
+; use:
+; CHECK: warning: coverage.cc:7: 4 of 5 available profile records (80%) were applied
+;
+; Since the unused sample record contributes no samples, sample coverage should
+; be 100%. Note that we get this warning because we are requesting an impossible
+; 110% coverage check.
+; CHECK: warning: coverage.cc:7: 78834 of 78834 available profile samples (100%) were applied
+
+define i64 @_Z3fool(i64 %i) !dbg !4 {
+entry:
+ %i.addr = alloca i64, align 8
+ store i64 %i, i64* %i.addr, align 8
+ call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+ %call = call i32 @rand(), !dbg !19
+ %conv = sext i32 %call to i64, !dbg !19
+ %0 = load i64, i64* %i.addr, align 8, !dbg !20
+ %mul = mul nsw i64 %conv, %0, !dbg !21
+ ret i64 %mul, !dbg !22
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare i32 @rand()
+
+define i32 @main() !dbg !9 {
+entry:
+ %retval = alloca i32, align 4
+ %sum = alloca i64, align 8
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i64* %sum, metadata !23, metadata !17), !dbg !24
+ store i64 0, i64* %sum, align 8, !dbg !24
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !25, metadata !17), !dbg !27
+ store i32 0, i32* %i, align 4, !dbg !27
+ br label %for.cond, !dbg !28
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4, !dbg !29
+ %cmp = icmp slt i32 %0, 600000000, !dbg !32
+ br i1 %cmp, label %for.body, label %for.end, !dbg !33
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4, !dbg !34
+ %conv = sext i32 %1 to i64, !dbg !34
+ %call = call i64 @_Z3fool(i64 %conv), !dbg !35
+ %2 = load i64, i64* %sum, align 8, !dbg !36
+ %add = add nsw i64 %2, %call, !dbg !36
+ store i64 %add, i64* %sum, align 8, !dbg !36
+ br label %for.inc, !dbg !37
+
+for.inc: ; preds = %for.body
+ %3 = load i32, i32* %i, align 4, !dbg !38
+ %inc = add nsw i32 %3, 1, !dbg !38
+ store i32 %inc, i32* %i, align 4, !dbg !38
+ br label %for.cond, !dbg !39
+
+for.end: ; preds = %for.cond
+ %4 = load i64, i64* %sum, align 8, !dbg !40
+ %cmp1 = icmp sgt i64 %4, 0, !dbg !41
+ %cond = select i1 %cmp1, i32 0, i32 1, !dbg !40
+ ret i32 %cond, !dbg !42
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "coverage.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 10, scope: !4)
+!20 = !DILocation(line: 4, column: 19, scope: !4)
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !DILocation(line: 4, column: 3, scope: !4)
+!23 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!24 = !DILocation(line: 8, column: 17, scope: !9)
+!25 = !DILocalVariable(name: "i", scope: !26, file: !1, line: 9, type: !12)
+!26 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!27 = !DILocation(line: 9, column: 12, scope: !26)
+!28 = !DILocation(line: 9, column: 8, scope: !26)
+!29 = !DILocation(line: 9, column: 19, scope: !30)
+!30 = !DILexicalBlockFile(scope: !31, file: !1, discriminator: 1)
+!31 = distinct !DILexicalBlock(scope: !26, file: !1, line: 9, column: 3)
+!32 = !DILocation(line: 9, column: 21, scope: !30)
+!33 = !DILocation(line: 9, column: 3, scope: !30)
+!34 = !DILocation(line: 10, column: 16, scope: !31)
+!35 = !DILocation(line: 10, column: 12, scope: !31)
+!36 = !DILocation(line: 10, column: 9, scope: !31)
+!37 = !DILocation(line: 10, column: 5, scope: !31)
+!38 = !DILocation(line: 9, column: 39, scope: !31)
+!39 = !DILocation(line: 9, column: 3, scope: !31)
+!40 = !DILocation(line: 11, column: 10, scope: !9)
+!41 = !DILocation(line: 11, column: 14, scope: !9)
+!42 = !DILocation(line: 11, column: 3, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-hint.ll b/test/Transforms/SampleProfile/inline-hint.ll
new file mode 100644
index 000000000000..16c4e64ec5bb
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-hint.ll
@@ -0,0 +1,38 @@
+; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1
+define void @_Z7cold_fnRxi() !dbg !4 {
+entry:
+ ret void, !dbg !29
+}
+
+; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0
+define void @_Z6hot_fnRxi() #0 !dbg !10 {
+entry:
+ ret void, !dbg !38
+}
+
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!1 = !DIFile(filename: "inline-hint.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !10, !11, !14}
+!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64)
+!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!12 = !DISubroutineType(types: !13)
+!13 = !{!8, !8}
+!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!9}
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"}
+!29 = !DILocation(line: 5, column: 1, scope: !4)
+!38 = !DILocation(line: 9, column: 1, scope: !10)
diff --git a/test/Transforms/SampleProfile/inline.ll b/test/Transforms/SampleProfile/inline.ll
new file mode 100644
index 000000000000..590a20f9d1d1
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+; return x + y;
+; }
+;
+; int main() {
+; int s, i = 0;
+; while (i++ < 20000 * 20000)
+; if (i != 100) s = sum(i, s); else s = 30;
+; printf("sum is %d\n", s);
+; return 0;
+; }
+;
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4, !dbg !11
+ %1 = load i32, i32* %y.addr, align 4, !dbg !11
+ %add = add nsw i32 %0, %1, !dbg !11
+ ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+ %retval = alloca i32, align 4
+ %s = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i32 0, i32* %i, align 4, !dbg !12
+ br label %while.cond, !dbg !13
+
+while.cond: ; preds = %if.end, %entry
+ %0 = load i32, i32* %i, align 4, !dbg !14
+ %inc = add nsw i32 %0, 1, !dbg !14
+ store i32 %inc, i32* %i, align 4, !dbg !14
+ %cmp = icmp slt i32 %0, 400000000, !dbg !14
+ br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body: ; preds = %while.cond
+ %1 = load i32, i32* %i, align 4, !dbg !16
+ %cmp1 = icmp ne i32 %1, 100, !dbg !16
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then: ; preds = %while.body
+ %2 = load i32, i32* %i, align 4, !dbg !18
+ %3 = load i32, i32* %s, align 4, !dbg !18
+ %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+; CHECK-NOT: call i32 @_Z3sumii
+ store i32 %call, i32* %s, align 4, !dbg !18
+ br label %if.end, !dbg !18
+
+if.else: ; preds = %while.body
+ store i32 30, i32* %s, align 4, !dbg !20
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %while.cond, !dbg !22
+
+while.end: ; preds = %while.cond
+ %4 = load i32, i32* %s, align 4, !dbg !24
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+ ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 3, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
diff --git a/test/Transforms/SampleProfile/nolocinfo.ll b/test/Transforms/SampleProfile/nolocinfo.ll
new file mode 100644
index 000000000000..08bca20984dd
--- /dev/null
+++ b/test/Transforms/SampleProfile/nolocinfo.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+
+define i32 @foo(i32 %i) !dbg !4 {
+entry:
+ %i.addr = alloca i32, align 4
+ %0 = load i32, i32* %i.addr, align 4
+ %cmp = icmp sgt i32 %0, 1000
+
+; Remarks for conditional branches need debug location information for the
+; referring branch. When that is not present, the compiler should not abort.
+;
+; CHECK: remark: nolocinfo.c:3:5: most popular destination for conditional branches at <UNKNOWN LOCATION>
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ ret i32 0, !dbg !18
+
+if.end:
+ ret i32 1
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "nolocinfo.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)"}
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!18 = !DILocation(line: 3, column: 5, scope: !15)
diff --git a/test/Transforms/SampleProfile/offset.ll b/test/Transforms/SampleProfile/offset.ll
new file mode 100644
index 000000000000..499b2826402d
--- /dev/null
+++ b/test/Transforms/SampleProfile/offset.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; a.cc:
+; #1
+; #2
+; #3
+; #4
+; #5 int foo(int a) {
+; #6 #include "a.b"
+; #7}
+;
+; a.b:
+; #1 if (a > 0) {
+; #2 return 10;
+; #3 } else {
+; #4 return 20;
+; #5 }
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3fooi(i32 %a) #0 !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ %a.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !12), !dbg !13
+ %0 = load i32, i32* %a.addr, align 4, !dbg !14
+ %cmp = icmp sgt i32 %0, 0, !dbg !18
+ br i1 %cmp, label %if.then, label %if.else, !dbg !19
+; CHECK: edge entry -> if.then probability is 0x0147ae14 / 0x80000000 = 1.00%
+; CHECK: edge entry -> if.else probability is 0x7eb851ec / 0x80000000 = 99.00% [HOT edge]
+
+if.then: ; preds = %entry
+ store i32 10, i32* %retval, align 4, !dbg !20
+ br label %return, !dbg !20
+
+if.else: ; preds = %entry
+ store i32 20, i32* %retval, align 4, !dbg !22
+ br label %return, !dbg !22
+
+return: ; preds = %if.else, %if.then
+ %1 = load i32, i32* %retval, align 4, !dbg !24
+ ret i32 %1, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 250750)"}
+!11 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 5, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 5, column: 13, scope: !4)
+!14 = !DILocation(line: 1, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !17, file: !16, line: 1, column: 5)
+!16 = !DIFile(filename: "./a.b", directory: "/tmp")
+!17 = !DILexicalBlockFile(scope: !4, file: !16, discriminator: 0)
+!18 = !DILocation(line: 1, column: 7, scope: !15)
+!19 = !DILocation(line: 1, column: 5, scope: !17)
+!20 = !DILocation(line: 2, column: 3, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !15, file: !16, line: 1, column: 12)
+!22 = !DILocation(line: 4, column: 3, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !15, file: !16, line: 3, column: 8)
+!24 = !DILocation(line: 7, column: 1, scope: !25)
+!25 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 0)
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index 620d125875f7..eef7b162eb7a 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -40,7 +40,7 @@ target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1
; Function Attrs: nounwind uwtable
-define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 {
+define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !4 {
entry:
%retval = alloca i64, align 8
%x.addr = alloca i32, align 4
@@ -73,8 +73,8 @@ for.cond: ; preds = %for.inc16, %if.else
%5 = load i64, i64* %N.addr, align 8, !dbg !15
%cmp1 = icmp slt i64 %4, %5, !dbg !15
br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
-; CHECK: edge for.cond -> for.body probability is 10 / 10 = 100% [HOT edge]
-; CHECK: edge for.cond -> for.end18 probability is 0 / 10 = 0%
+; CHECK: edge for.cond -> for.body probability is 0x745d1746 / 0x80000000 = 90.91% [HOT edge]
+; CHECK: edge for.cond -> for.end18 probability is 0x0ba2e8ba / 0x80000000 = 9.09%
for.body: ; preds = %for.cond
%6 = load i64, i64* %i, align 8, !dbg !18
@@ -82,8 +82,8 @@ for.body: ; preds = %for.cond
%div = sdiv i64 %7, 3, !dbg !18
%cmp2 = icmp sgt i64 %6, %div, !dbg !18
br i1 %cmp2, label %if.then3, label %if.end, !dbg !18
-; CHECK: edge for.body -> if.then3 probability is 1 / 5 = 20%
-; CHECK: edge for.body -> if.end probability is 4 / 5 = 80%
+; CHECK: edge for.body -> if.then3 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge for.body -> if.end probability is 0x66666666 / 0x80000000 = 80.00%
if.then3: ; preds = %for.body
%8 = load i32, i32* %x.addr, align 4, !dbg !21
@@ -97,8 +97,8 @@ if.end: ; preds = %if.then3, %for.body
%div4 = sdiv i64 %10, 4, !dbg !22
%cmp5 = icmp sgt i64 %9, %div4, !dbg !22
br i1 %cmp5, label %if.then6, label %if.else7, !dbg !22
-; CHECK: edge if.end -> if.then6 probability is 3 / 6342 = 0.0473037%
-; CHECK: edge if.end -> if.else7 probability is 6339 / 6342 = 99.9527% [HOT edge]
+; CHECK: edge if.end -> if.then6 probability is 0x000f801f / 0x80000000 = 0.05%
+; CHECK: edge if.end -> if.else7 probability is 0x7ff07fe1 / 0x80000000 = 99.95% [HOT edge]
if.then6: ; preds = %if.end
%11 = load i32, i32* %y.addr, align 4, !dbg !24
@@ -119,8 +119,8 @@ for.cond8: ; preds = %for.inc, %if.else7
%14 = load i64, i64* %i, align 8, !dbg !28
%cmp10 = icmp slt i64 %conv9, %14, !dbg !28
br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
-; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16191 = 100% [HOT edge]
-; CHECK: edge for.cond8 -> for.end probability is 0 / 16191 = 0%
+; CHECK: edge for.cond8 -> for.body11 probability is 0x5bfc7472 / 0x80000000 = 71.86%
+; CHECK: edge for.cond8 -> for.end probability is 0x24038b8e / 0x80000000 = 28.14%
for.body11: ; preds = %for.cond8
%15 = load i32, i32* %j, align 4, !dbg !31
@@ -167,7 +167,7 @@ return: ; preds = %if.end19, %if.then
}
; Function Attrs: uwtable
-define i32 @main() #1 {
+define i32 @main() #1 !dbg !7 {
entry:
%retval = alloca i32, align 4
%x = alloca i32, align 4
@@ -198,14 +198,14 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "propagate.cc", directory: ".")
!2 = !{}
!3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i64 (i32, i32, i64)* @_Z3fooiil, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "propagate.cc", directory: ".")
!6 = !DISubroutineType(types: !{null})
-!7 = !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, variables: !2)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/remarks.ll b/test/Transforms/SampleProfile/remarks.ll
new file mode 100644
index 000000000000..a0e6a9deb8a8
--- /dev/null
+++ b/test/Transforms/SampleProfile/remarks.ll
@@ -0,0 +1,185 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+;
+; Original test case.
+;
+; 1 #include <stdlib.h>
+; 2
+; 3 long long foo() {
+; 4 long long int sum = 0;
+; 5 for (int i = 0; i < 500000000; i++)
+; 6 if (i < 1000)
+; 7 sum -= i;
+; 8 else
+; 9 sum += -i * rand();
+; 10 return sum;
+; 11 }
+; 12
+; 13 int main() { return foo() > 0; }
+
+; We are expecting foo() to be inlined in main() (almost all the cycles are
+; spent inside foo).
+; CHECK: remark: remarks.cc:13:21: inlined hot callee '_Z3foov' with 623868 samples into 'main'
+
+; The back edge for the loop is the hottest edge in the loop subgraph.
+; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3
+
+; The predicate almost always chooses the 'else' branch.
+; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9
+
+; Function Attrs: nounwind uwtable
+define i64 @_Z3foov() #0 !dbg !4 {
+entry:
+ %sum = alloca i64, align 8
+ %i = alloca i32, align 4
+ %0 = bitcast i64* %sum to i8*, !dbg !19
+ call void @llvm.lifetime.start(i64 8, i8* %0) #4, !dbg !19
+ call void @llvm.dbg.declare(metadata i64* %sum, metadata !9, metadata !20), !dbg !21
+ store i64 0, i64* %sum, align 8, !dbg !21, !tbaa !22
+ %1 = bitcast i32* %i to i8*, !dbg !26
+ call void @llvm.lifetime.start(i64 4, i8* %1) #4, !dbg !26
+ call void @llvm.dbg.declare(metadata i32* %i, metadata !10, metadata !20), !dbg !27
+ store i32 0, i32* %i, align 4, !dbg !27, !tbaa !28
+ br label %for.cond, !dbg !26
+
+for.cond: ; preds = %for.inc, %entry
+ %2 = load i32, i32* %i, align 4, !dbg !30, !tbaa !28
+ %cmp = icmp slt i32 %2, 500000000, !dbg !34
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !35
+
+for.cond.cleanup: ; preds = %for.cond
+ %3 = bitcast i32* %i to i8*, !dbg !36
+ call void @llvm.lifetime.end(i64 4, i8* %3) #4, !dbg !36
+ br label %for.end
+
+for.body: ; preds = %for.cond
+ %4 = load i32, i32* %i, align 4, !dbg !38, !tbaa !28
+ %cmp1 = icmp slt i32 %4, 1000, !dbg !40
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !41
+
+if.then: ; preds = %for.body
+ %5 = load i32, i32* %i, align 4, !dbg !42, !tbaa !28
+ %conv = sext i32 %5 to i64, !dbg !42
+ %6 = load i64, i64* %sum, align 8, !dbg !43, !tbaa !22
+ %sub = sub nsw i64 %6, %conv, !dbg !43
+ store i64 %sub, i64* %sum, align 8, !dbg !43, !tbaa !22
+ br label %if.end, !dbg !44
+
+if.else: ; preds = %for.body
+ %7 = load i32, i32* %i, align 4, !dbg !45, !tbaa !28
+ %sub2 = sub nsw i32 0, %7, !dbg !46
+ %call = call i32 @rand() #4, !dbg !47
+ %mul = mul nsw i32 %sub2, %call, !dbg !48
+ %conv3 = sext i32 %mul to i64, !dbg !46
+ %8 = load i64, i64* %sum, align 8, !dbg !49, !tbaa !22
+ %add = add nsw i64 %8, %conv3, !dbg !49
+ store i64 %add, i64* %sum, align 8, !dbg !49, !tbaa !22
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ br label %for.inc, !dbg !50
+
+for.inc: ; preds = %if.end
+ %9 = load i32, i32* %i, align 4, !dbg !51, !tbaa !28
+ %inc = add nsw i32 %9, 1, !dbg !51
+ store i32 %inc, i32* %i, align 4, !dbg !51, !tbaa !28
+ br label %for.cond, !dbg !52
+
+for.end: ; preds = %for.cond.cleanup
+ %10 = load i64, i64* %sum, align 8, !dbg !53, !tbaa !22
+ %11 = bitcast i64* %sum to i8*, !dbg !54
+ call void @llvm.lifetime.end(i64 8, i8* %11) #4, !dbg !54
+ ret i64 %10, !dbg !55
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare i32 @rand() #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !13 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ %call = call i64 @_Z3foov(), !dbg !56
+ %cmp = icmp sgt i64 %call, 0, !dbg !57
+ %conv = zext i1 %cmp to i32, !dbg !56
+ ret i32 %conv, !dbg !58
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "remarks.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !13}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "sum", scope: !4, file: !1, line: 4, type: !7)
+!10 = !DILocalVariable(name: "i", scope: !11, file: !1, line: 5, type: !12)
+!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 3)
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!12}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)"}
+!19 = !DILocation(line: 4, column: 3, scope: !4)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"long long", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 5, column: 8, scope: !11)
+!27 = !DILocation(line: 5, column: 12, scope: !11)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"int", !24, i64 0}
+!30 = !DILocation(line: 5, column: 19, scope: !31)
+!31 = !DILexicalBlockFile(scope: !32, file: !1, discriminator: 3)
+!32 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1)
+!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 5, column: 3)
+!34 = !DILocation(line: 5, column: 21, scope: !33)
+!35 = !DILocation(line: 5, column: 3, scope: !11)
+!36 = !DILocation(line: 5, column: 3, scope: !37)
+!37 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 2)
+!38 = !DILocation(line: 6, column: 9, scope: !39)
+!39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 6, column: 9)
+!40 = !DILocation(line: 6, column: 11, scope: !39)
+!41 = !DILocation(line: 6, column: 9, scope: !33)
+!42 = !DILocation(line: 7, column: 14, scope: !39)
+!43 = !DILocation(line: 7, column: 11, scope: !39)
+!44 = !DILocation(line: 7, column: 7, scope: !39)
+!45 = !DILocation(line: 9, column: 15, scope: !39)
+!46 = !DILocation(line: 9, column: 14, scope: !39)
+!47 = !DILocation(line: 9, column: 19, scope: !39)
+!48 = !DILocation(line: 9, column: 17, scope: !39)
+!49 = !DILocation(line: 9, column: 11, scope: !39)
+!50 = !DILocation(line: 6, column: 13, scope: !39)
+!51 = !DILocation(line: 5, column: 35, scope: !33)
+!52 = !DILocation(line: 5, column: 3, scope: !33)
+!53 = !DILocation(line: 10, column: 10, scope: !4)
+!54 = !DILocation(line: 11, column: 1, scope: !4)
+!55 = !DILocation(line: 10, column: 3, scope: !4)
+!56 = !DILocation(line: 13, column: 21, scope: !13)
+!57 = !DILocation(line: 13, column: 27, scope: !13)
+!58 = !DILocation(line: 13, column: 14, scope: !13)
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
index ed38a175288a..debbc7c87ddb 100644
--- a/test/Transforms/SampleProfile/syntax.ll
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -13,7 +13,7 @@ entry:
}
; NO-DEBUG: warning: No debug information found in function empty: Function profile not used
; MISSING-FILE: missing.prof: Could not open profile:
-; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found 3empty:100:BAD
+; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof: Could not open profile: Unrecognized sample profile encoding format
; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
; BAD-DISCRIMINATOR-VALUE: error: {{.*}}bad_discriminator_value.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.-3: 10
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 8e5d4ff773dd..4daa610ccdcb 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.6.0"
; CHECK: llvm.dbg.value
; CHECK: llvm.dbg.value
-define i32 @f(i32 %a, i32 %b) nounwind ssp {
+define i32 @f(i32 %a, i32 %b) nounwind ssp !dbg !1 {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca i32, align 4
@@ -42,17 +42,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!20}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
-!1 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3, function: i32 (i32, i32)* @f)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
+!1 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3)
!2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
+!6 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
!7 = !DILocation(line: 1, column: 11, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
+!8 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
!9 = !DILocation(line: 1, column: 18, scope: !1)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 2, scope: !11, file: !2, type: !5)
+!10 = !DILocalVariable(name: "c", line: 2, scope: !11, file: !2, type: !5)
!11 = distinct !DILexicalBlock(line: 1, column: 21, file: !18, scope: !1)
!12 = !DILocation(line: 2, column: 9, scope: !11)
!13 = !DILocation(line: 2, column: 14, scope: !11)
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 3770a3e8c642..09252a09d4b4 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -2,7 +2,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
-define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 {
+define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 !dbg !4 {
; CHECK: @f1(
; CHECK: %a.i0 = bitcast <4 x i32>* %a to i32*
; CHECK: %a.i1 = getelementptr i32, i32* %a.i0, i32 1
@@ -57,11 +57,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!18, !26}
!llvm.ident = !{!19}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
!2 = !{}
!3 = !{!4}
-!4 = !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, variables: !14)
+!4 = distinct !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !14)
!5 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !8, !8}
@@ -72,9 +72,9 @@ attributes #1 = { nounwind readnone }
!12 = !{!13}
!13 = !DISubrange(count: 4)
!14 = !{!15, !16, !17}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
!18 = !{i32 2, !"Dwarf Version", i32 4}
!19 = !{!"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
!20 = !DILocation(line: 3, scope: !4)
diff --git a/test/Transforms/Scalarizer/store-bug.ll b/test/Transforms/Scalarizer/store-bug.ll
new file mode 100644
index 000000000000..84c2b3f840a0
--- /dev/null
+++ b/test/Transforms/Scalarizer/store-bug.ll
@@ -0,0 +1,25 @@
+; RUN: opt -scalarizer -scalarize-load-store -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This input caused the scalarizer not to clear cached results
+; properly.
+;
+; Any regressions should trigger an assert in the scalarizer.
+
+define void @func(<4 x float> %val, <4 x float> *%ptr) {
+ store <4 x float> %val, <4 x float> *%ptr
+ ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
+
+define void @func.copy(<4 x float> %val, <4 x float> *%ptr) {
+ store <4 x float> %val, <4 x float> *%ptr
+ ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
index 527634db0f5b..6f117697dded 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp = sext i32 %y to i64
%tmp1 = sext i32 %x to i64
@@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
; IR: add i32 %x, 256
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
; DS instructions have a larger immediate offset, so make sure these are OK.
; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
%tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
%tmp4 = load float, float addrspace(3)* %tmp2, align 4
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 073313d40e77..e7b3545839c3 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
; the order of "sext" and "add" when computing the array indices. @sum_of_array
@@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array2(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; This function loads
@@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array3(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
; This function loads
@@ -191,6 +191,44 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
; IR-LABEL: @sum_of_array4(
; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; The source code is:
+; p0 = &input[sext(x + y)];
+; p1 = &input[sext(x + (y + 5))];
+;
+; Without reuniting extensions, SeparateConstOffsetFromGEP would emit
+; p0 = &input[sext(x + y)];
+; t1 = &input[sext(x) + sext(y)];
+; p1 = &t1[5];
+;
+; With reuniting extensions, it merges p0 and t1 and thus emits
+; p0 = &input[sext(x + y)];
+; p1 = &p0[5];
+define void @reunion(i32 %x, i32 %y, float* %input) {
+; IR-LABEL: @reunion(
+; PTX-LABEL: reunion(
+entry:
+ %xy = add nsw i32 %x, %y
+ %0 = sext i32 %xy to i64
+ %p0 = getelementptr inbounds float, float* %input, i64 %0
+ %v0 = load float, float* %p0, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
+ call void @use(float %v0)
+
+ %y5 = add nsw i32 %y, 5
+ %xy5 = add nsw i32 %x, %y5
+ %1 = sext i32 %xy5 to i64
+ %p1 = getelementptr inbounds float, float* %input, i64 %1
+; IR: getelementptr inbounds float, float* %p0, i64 5
+ %v1 = load float, float* %p1, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
+ call void @use(float %v1)
+
+ ret void
+}
+
+declare void @use(float)
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 2fdd158a35ed..eeeac1963741 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -44,7 +44,7 @@ entry:
; CHECK: add i32 %j, -2
; CHECK: sext
; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
; We should be able to trace into sext/zext if it can be distributed to both
; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
@@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
}
; CHECK-LABEL: @ext_add_no_overflow(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
; Verifies we handle nested sext/zext correctly.
define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
@@ -110,7 +110,7 @@ entry:
}
; CHECK-LABEL: @sext_or(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -125,7 +125,7 @@ entry:
}
; CHECK-LABEL: @expr(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
; CHECK: store i64 %b5, i64* %out
; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
@@ -143,7 +143,7 @@ entry:
; CHECK: sext i32
; CHECK: sext i32
; CHECK: sext i32
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
; Verifies we handle "sub" correctly.
define float* @sub(i64 %i, i64 %j) {
@@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) {
; CHECK-LABEL: @sub(
; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
%struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
@@ -173,7 +173,7 @@ entry:
; CHECK-LABEL: @packed_struct(
; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
-; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
; CHECK: bitcast i8* %uglygep to i64*
; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
@@ -272,7 +272,7 @@ entry:
%ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
-; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
+; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
; CHECK: bitcast
ret %struct2* %ptr2
; CHECK-NEXT: ret
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
index d1a0f33d5a21..601ca5291353 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
@@ -25,7 +25,7 @@ then:
%or = or i64 %i, 3
%p = getelementptr inbounds float, float* %input, i64 %or
; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr float, float* [[base]], i64 3
+; CHECK: getelementptr inbounds float, float* [[base]], i64 3
ret float* %p
exit:
diff --git a/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
new file mode 100644
index 000000000000..e32d711143dc
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=aarch64 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
new file mode 100644
index 000000000000..ffcf2175091f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=arm -mattr=+v6t2 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/ARM/lit.local.cfg b/test/Transforms/SimplifyCFG/ARM/lit.local.cfg
new file mode 100644
index 000000000000..5a3b8565213d
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ARM/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+ config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
new file mode 100644
index 000000000000..b4bfb51dd142
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=mips-linux-gnu < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+ %tobool = icmp eq i32 %A, 0
+ br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+ %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+ br label %cond.end
+
+cond.end:
+ %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+ ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/Mips/lit.local.cfg b/test/Transforms/SimplifyCFG/Mips/lit.local.cfg
new file mode 100644
index 000000000000..683bfdccb742
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Mips/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+ config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/PR25267.ll b/test/Transforms/SimplifyCFG/PR25267.ll
new file mode 100644
index 000000000000..a13d45a0f271
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR25267.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define void @f() {
+entry:
+ br label %for.cond
+
+for.cond:
+ %phi = phi i1 [ false, %entry ], [ true, %for.body ]
+ %select = select i1 %phi, i32 1, i32 2
+ br label %for.body
+
+for.body:
+ switch i32 %select, label %for.cond [
+ i32 1, label %return
+ i32 2, label %for.body
+ ]
+
+return:
+ ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: br label %[[LABEL:.*]]
+; CHECK: br label %[[LABEL]]
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index c23a96df52ee..73f9a0f88aca 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -69,3 +69,29 @@ end:
ret i8* %x10
}
+
+define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1,
+ i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) %ptr3) nounwind {
+; CHECK-LABEL: @test5(
+entry:
+ %tmp1 = icmp eq i32 %b, 0
+ br i1 %tmp1, label %bb1, label %bb3
+
+bb1: ; preds = %entry
+ %tmp2 = icmp sgt i32 %c, 1
+ br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: load i32*, i32** %ptr3
+; CHECK-NOT: dereferenceable
+; CHECK-NEXT: select i1 %tmp2, i32* %tmp3, i32* %ptr2
+; CHECK-NEXT: ret i32* %tmp3.ptr2
+
+bb2: ; preds = bb1
+ %tmp3 = load i32*, i32** %ptr3, !dereferenceable !{i64 10}
+ br label %bb3
+
+bb3: ; preds = %bb2, %entry
+ %tmp4 = phi i32* [ %ptr1, %entry ], [ %ptr2, %bb1 ], [ %tmp3, %bb2 ]
+ ret i32* %tmp4
+}
diff --git a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index 69f6c69059d4..bee80e6acce0 100644
--- a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -7,9 +7,7 @@ define i64 @test1(i64 %A) {
; ALL-LABEL: @test1(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
; ALL: ret
entry:
%tobool = icmp eq i64 %A, 0
@@ -28,9 +26,7 @@ define i32 @test2(i32 %A) {
; ALL-LABEL: @test2(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
; ALL: ret
entry:
%tobool = icmp eq i32 %A, 0
@@ -50,9 +46,7 @@ define signext i16 @test3(i16 signext %A) {
; ALL-LABEL: @test3(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
; ALL: ret
entry:
%tobool = icmp eq i16 %A, 0
@@ -72,9 +66,7 @@ define i64 @test1b(i64 %A) {
; ALL-LABEL: @test1b(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
; ALL: ret
entry:
%tobool = icmp eq i64 %A, 0
@@ -94,9 +86,7 @@ define i32 @test2b(i32 %A) {
; ALL-LABEL: @test2b(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
; ALL: ret
entry:
%tobool = icmp eq i32 %A, 0
@@ -116,9 +106,7 @@ define signext i16 @test3b(i16 signext %A) {
; ALL-LABEL: @test3b(
; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
; ALL: ret
entry:
%tobool = icmp eq i16 %A, 0
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index d228499b2ec5..d4a9c81e506d 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -50,7 +50,7 @@ define i8 @test6f() {
; CHECK: alloca i8, align 1
; CHECK-NEXT: call i8 @test6g
; CHECK-NEXT: icmp eq i8 %tmp, 0
-; CHECK-NEXT: load i8, i8* %r, align 1{{$}}
+; CHECK-NEXT: load i8, i8* %r, align 1, !dbg !{{[0-9]+$}}
bb0:
%r = alloca i8, align 1
@@ -58,7 +58,7 @@ bb0:
%tmp1 = icmp eq i8 %tmp, 0
br i1 %tmp1, label %bb2, label %bb1
bb1:
- %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1
+ %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1, !dbg !5
%tmp4 = icmp eq i8 %tmp3, 1
br i1 %tmp4, label %bb2, label %bb3
bb2:
@@ -69,6 +69,16 @@ bb3:
}
declare i8 @test6g(i8*)
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!8, !9}
+
!0 = !{!1, !1, i64 0}
!1 = !{!"foo"}
!2 = !{i8 0, i8 2}
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !7, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !4, subprograms: !4, globals: !4)
+!4 = !{}
+!5 = !DILocation(line: 23, scope: !6)
+!6 = distinct !DISubprogram(name: "foo", scope: !3, file: !7, line: 1, type: !DISubroutineType(types: !4), isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !4)
+!7 = !DIFile(filename: "foo.c", directory: "/")
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index fac5b186e89d..34871063bbcc 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -4,7 +4,7 @@
@0 = external hidden constant [5 x %0], align 4
-define void @foo(i32) nounwind ssp {
+define void @foo(i32) nounwind ssp !dbg !0 {
Entry:
%1 = icmp slt i32 %0, 0, !dbg !5
br i1 %1, label %BB5, label %BB1, !dbg !5
@@ -41,14 +41,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3, function: void (i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3)
!1 = !DIFile(filename: "a.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 131, column: 2, scope: !0)
!6 = !DILocation(line: 134, column: 2, scope: !0)
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "bar", line: 232, scope: !8, file: !1, type: !9)
+!7 = !DILocalVariable(name: "bar", line: 232, scope: !8, file: !1, type: !9)
!8 = distinct !DILexicalBlock(line: 231, column: 1, file: !15, scope: !0)
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !10)
!10 = !DIDerivedType(tag: DW_TAG_const_type, scope: !2, baseType: !11)
diff --git a/test/Transforms/SimplifyCFG/empty-cleanuppad.ll b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
new file mode 100644
index 000000000000..57b362889955
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
@@ -0,0 +1,415 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; ModuleID = 'cppeh-simplify.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+
+; This case arises when two objects with empty destructors are cleaned up.
+;
+; void f1() {
+; S a;
+; S b;
+; g();
+; }
+;
+; In this case, both cleanup pads can be eliminated and the invoke can be
+; converted to a call.
+;
+; CHECK: define void @f1()
+; CHECK: entry:
+; CHECK: call void @g()
+; CHECK: ret void
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f1() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ ret void
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %ehcleanup.1
+
+ehcleanup.1: ; preds = %ehcleanup
+ %1 = cleanuppad within none []
+ cleanupret from %1 unwind to caller
+}
+
+
+; This case arises when an object with an empty destructor must be cleaned up
+; outside of a try-block and an object with a non-empty destructor must be
+; cleaned up within the try-block.
+;
+; void f2() {
+; S a;
+; try {
+; S2 b;
+; g();
+; } catch (...) {}
+; }
+;
+; In this case, the outermost cleanup pad can be eliminated and the catch block
+; should unwind to the caller (that is, exception handling continues with the
+; parent frame of the caller).
+;
+; CHECK: define void @f2()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: ehcleanup:
+; CHECK: cleanuppad within none
+; CHECK: call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %b)
+; CHECK: cleanupret from %0 unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK: catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK: catchpad
+; CHECK: catchret
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f2() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %b = alloca %struct.S2, align 1
+ invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %b)
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup
+ %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup.1
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %1 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %catchret.dest, %invoke.cont
+ ret void
+
+ehcleanup.1:
+ %2 = cleanuppad within none []
+ cleanupret from %2 unwind to caller
+}
+
+
+; This case arises when an object with a non-empty destructor must be cleaned up
+; outside of a try-block and an object with an empty destructor must be cleaned
+; within the try-block.
+;
+; void f3() {
+; S2 a;
+; try {
+; S b;
+; g();
+; } catch (...) {}
+; }
+;
+; In this case the inner cleanup pad should be eliminated and the invoke of g()
+; should unwind directly to the catchpad.
+;
+; CHECK-LABEL: define void @f3()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: to label %try.cont unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK-NEXT: catchswitch within none [label %catch] unwind label %ehcleanup.1
+; CHECK: catch:
+; CHECK: catchpad within %cs1 [i8* null, i32 64, i8* null]
+; CHECK: catchret
+; CHECK: ehcleanup.1:
+; CHECK: cleanuppad
+; CHECK: call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %a)
+; CHECK: cleanupret from %cp3 unwind to caller
+; CHECK: }
+;
+define void @f3() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ %a = alloca %struct.S2, align 1
+ invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+ehcleanup: ; preds = %entry
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup
+ %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup.1
+
+catch: ; preds = %catch.dispatch
+ %cp2 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %cp2 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %catchret.dest, %invoke.cont
+ ret void
+
+ehcleanup.1:
+ %cp3 = cleanuppad within none []
+ call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %a)
+ cleanupret from %cp3 unwind to caller
+}
+
+
+; This case arises when an object with an empty destructor may require cleanup
+; from either inside or outside of a try-block.
+;
+; void f4() {
+; S a;
+; g();
+; try {
+; g();
+; } catch (...) {}
+; }
+;
+; In this case, the cleanuppad should be eliminated, the invoke outside of the
+; catch block should be converted to a call (that is, that is, exception
+; handling continues with the parent frame of the caller).)
+;
+; CHECK-LABEL: define void @f4()
+; CHECK: entry:
+; CHECK: call void @g
+; Note: The cleanuppad simplification will insert an unconditional branch here
+; but it will be eliminated, placing the following invoke in the entry BB.
+; CHECK: invoke void @g()
+; CHECK: to label %try.cont unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK: catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK: catchpad
+; CHECK: catchret
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f4() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %invoke.cont
+ %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup
+
+catch: ; preds = %catch.dispatch
+ %0 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %0 to label %try.cont
+
+try.cont: ; preds = %catch, %invoke.cont
+ ret void
+
+ehcleanup:
+ %cp2 = cleanuppad within none []
+ cleanupret from %cp2 unwind to caller
+}
+
+; This case tests simplification of an otherwise empty cleanup pad that contains
+; a PHI node.
+;
+; int f6() {
+; int state = 1;
+; try {
+; S a;
+; g();
+; state = 2;
+; g();
+; } catch (...) {
+; return state;
+; }
+; return 0;
+; }
+;
+; In this case, the cleanup pad should be eliminated and the PHI node in the
+; cleanup pad should be sunk into the catch dispatch block.
+;
+; CHECK-LABEL: define i32 @f6()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont:
+; CHECK: invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: %state.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+; CHECK: }
+define i32 @f6() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %return unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont, %entry
+ %state.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %1 to label %return
+
+return: ; preds = %invoke.cont, %catch
+ %retval.0 = phi i32 [ %state.0, %catch ], [ 0, %invoke.cont ]
+ ret i32 %retval.0
+}
+
+; This case tests another variation of simplification of an otherwise empty
+; cleanup pad that contains a PHI node.
+;
+; int f7() {
+; int state = 1;
+; try {
+; g();
+; state = 2;
+; S a;
+; g();
+; state = 3;
+; g();
+; } catch (...) {
+; return state;
+; }
+; return 0;
+; }
+;
+; In this case, the cleanup pad should be eliminated and the PHI node in the
+; cleanup pad should be merged with the PHI node in the catch dispatch block.
+;
+; CHECK-LABEL: define i32 @f7()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont.1:
+; CHECK: invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: %state.1 = phi i32 [ 1, %entry ], [ 3, %invoke.cont.1 ], [ 2, %invoke.cont ]
+; CHECK: }
+define i32 @f7() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %catch.dispatch
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %invoke.cont.1 unwind label %ehcleanup
+
+invoke.cont.1: ; preds = %invoke.cont
+ invoke void @g()
+ to label %return unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont.1, %invoke.cont
+ %state.0 = phi i32 [ 3, %invoke.cont.1 ], [ 2, %invoke.cont ]
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup, %entry
+ %state.1 = phi i32 [ %state.0, %ehcleanup ], [ 1, %entry ]
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ catchret from %1 to label %return
+
+return: ; preds = %invoke.cont.1, %catch
+ %retval.0 = phi i32 [ %state.1, %catch ], [ 0, %invoke.cont.1 ]
+ ret i32 %retval.0
+}
+
+; This case tests a scenario where an empty cleanup pad is not dominated by all
+; of the predecessors of its successor, but the successor references a PHI node
+; in the empty cleanup pad.
+;
+; Conceptually, the case being modeled is something like this:
+;
+; int f8() {
+; int x = 1;
+; try {
+; S a;
+; g();
+; x = 2;
+; retry:
+; g();
+; return
+; } catch (...) {
+; use_x(x);
+; }
+; goto retry;
+; }
+;
+; While that C++ syntax isn't legal, the IR below is.
+;
+; In this case, the PHI node that is sunk from ehcleanup to catch.dispatch
+; should have an incoming value entry for path from 'foo' that references the
+; PHI node itself.
+;
+; CHECK-LABEL: define void @f8()
+; CHECK: entry:
+; CHECK: invoke void @g()
+; CHECK: invoke.cont:
+; CHECK: invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: %x = phi i32 [ 2, %invoke.cont ], [ 1, %entry ], [ %x, %catch.cont ]
+; CHECK: }
+define void @f8() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ invoke void @g()
+ to label %return unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont, %entry
+ %x = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+ %0 = cleanuppad within none []
+ cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %ehcleanup, %catch.cont
+ %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+ call void @use_x(i32 %x)
+ catchret from %1 to label %catch.cont
+
+catch.cont: ; preds = %catch
+ invoke void @g()
+ to label %return unwind label %catch.dispatch
+
+return: ; preds = %invoke.cont, %catch.cont
+ ret void
+}
+
+%struct.S = type { i8 }
+%struct.S2 = type { i8 }
+declare void @"\01??1S2@@QEAA@XZ"(%struct.S2*)
+declare void @g()
+declare void @use_x(i32 %x)
+
+declare i32 @__CxxFrameHandler3(...)
+
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 345cf6282e3c..887373a2d3db 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -1,6 +1,6 @@
; RUN: opt -simplifycfg -S < %s | FileCheck %s
-define i32 @foo(i32 %i) nounwind ssp {
+define i32 @foo(i32 %i) nounwind ssp !dbg !0 {
call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !7
call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !DIExpression()), !dbg !11
%1 = icmp ne i32 %i, 0, !dbg !12
@@ -32,16 +32,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!21}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3, function: i32 (i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3)
!1 = !DIFile(filename: "b.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
!7 = !DILocation(line: 2, column: 13, scope: !0)
!8 = !{i32 0}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3, scope: !10, file: !1, type: !5)
+!9 = !DILocalVariable(name: "k", line: 3, scope: !10, file: !1, type: !5)
!10 = distinct !DILexicalBlock(line: 2, column: 16, file: !20, scope: !0)
!11 = !DILocation(line: 3, column: 12, scope: !10)
!12 = !DILocation(line: 4, column: 3, scope: !10)
diff --git a/test/Transforms/SimplifyCFG/implied-cond.ll b/test/Transforms/SimplifyCFG/implied-cond.ll
new file mode 100644
index 000000000000..317adc4c3472
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-cond.ll
@@ -0,0 +1,81 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+; Check for when one branch implies the value of a successors conditional and
+; it's not simply the same conditional repeated.
+
+define void @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+ %iplus1 = add nsw i32 %i, 1
+ %var29 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var29, label %in_bounds, label %out_of_bounds
+ br i1 %var29, label %next, label %out_of_bounds
+
+next:
+; CHECK-LABEL: in_bounds:
+; CHECK-NEXT: ret void
+ %var30 = icmp slt i32 %i, %length.i
+ br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+ ret void
+
+out_of_bounds:
+ call void @foo(i64 0)
+ unreachable
+
+out_of_bounds2:
+ call void @foo(i64 1)
+ unreachable
+}
+
+; If the add is not nsw, it's not safe to use the fact about i+1 to imply the
+; i condition since it could have overflowed.
+define void @test_neg(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test_neg
+ %iplus1 = add i32 %i, 1
+ %var29 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var29, label %next, label %out_of_bounds
+ br i1 %var29, label %next, label %out_of_bounds
+
+next:
+ %var30 = icmp slt i32 %i, %length.i
+; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2
+ br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+ ret void
+
+out_of_bounds:
+ call void @foo(i64 0)
+ unreachable
+
+out_of_bounds2:
+ call void @foo(i64 1)
+ unreachable
+}
+
+
+define void @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+ %iplus100 = add nsw i32 %i, 100
+ %var29 = icmp slt i32 %iplus100, %length.i
+; CHECK: br i1 %var29, label %in_bounds, label %out_of_bounds
+ br i1 %var29, label %next, label %out_of_bounds
+
+next:
+ %var30 = icmp slt i32 %i, %length.i
+ br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+ ret void
+
+out_of_bounds:
+ call void @foo(i64 0)
+ unreachable
+
+out_of_bounds2:
+ call void @foo(i64 1)
+ unreachable
+}
+
+declare void @foo(i64)
+
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
index 3b4c09d96f77..100bfd4e9e3e 100644
--- a/test/Transforms/SimplifyCFG/invoke_unwind.ll
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -17,4 +17,17 @@ Rethrow:
resume { i8*, i32 } %exn
}
+define i32 @test2() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: call void @bar() [ "foo"(i32 100) ]
+; CHECK-NEXT: ret i32 0
+ invoke void @bar( ) [ "foo"(i32 100) ]
+ to label %1 unwind label %Rethrow
+ ret i32 0
+Rethrow:
+ %exn = landingpad {i8*, i32}
+ catch i8* null
+ resume { i8*, i32 } %exn
+}
+
declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
new file mode 100644
index 000000000000..fe498b5334e8
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
@@ -0,0 +1,215 @@
+; RUN: opt -S < %s -simplifycfg -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; This is a bit reversal that has been run through the early optimizer (-mem2reg -gvn -instcombine).
+; There should be no additional PHIs created at all. The store should be on its own in a predicated
+; block and there should be no PHIs.
+
+; CHECK-LABEL: @f
+; Exactly 15 phis, as there are 15 in the original test case.
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK-NOT: select
+; CHECK: br i1 {{.*}}, label %[[L:.*]], label %[[R:.*]]
+; CHECK: [[L]] ; preds =
+; CHECK-NEXT: store
+; CHECK-NEXT: br label %[[R]]
+; CHECK: [[R]] ; preds =
+; CHECK-NEXT: ret i32 0
+
+define i32 @f(i32* %b) {
+entry:
+ %0 = load i32, i32* %b, align 4
+ %and = and i32 %0, 1
+ %tobool = icmp eq i32 %and, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %or = or i32 %0, -2147483648
+ store i32 %or, i32* %b, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %1 = phi i32 [ %0, %entry ], [ %or, %if.then ]
+ %and1 = and i32 %1, 2
+ %tobool2 = icmp eq i32 %and1, 0
+ br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3: ; preds = %if.end
+ %or4 = or i32 %1, 1073741824
+ store i32 %or4, i32* %b, align 4
+ br label %if.end5
+
+if.end5: ; preds = %if.end, %if.then3
+ %2 = phi i32 [ %1, %if.end ], [ %or4, %if.then3 ]
+ %and6 = and i32 %2, 4
+ %tobool7 = icmp eq i32 %and6, 0
+ br i1 %tobool7, label %if.end10, label %if.then8
+
+if.then8: ; preds = %if.end5
+ %or9 = or i32 %2, 536870912
+ store i32 %or9, i32* %b, align 4
+ br label %if.end10
+
+if.end10: ; preds = %if.end5, %if.then8
+ %3 = phi i32 [ %2, %if.end5 ], [ %or9, %if.then8 ]
+ %and11 = and i32 %3, 8
+ %tobool12 = icmp eq i32 %and11, 0
+ br i1 %tobool12, label %if.end15, label %if.then13
+
+if.then13: ; preds = %if.end10
+ %or14 = or i32 %3, 268435456
+ store i32 %or14, i32* %b, align 4
+ br label %if.end15
+
+if.end15: ; preds = %if.end10, %if.then13
+ %4 = phi i32 [ %3, %if.end10 ], [ %or14, %if.then13 ]
+ %and16 = and i32 %4, 16
+ %tobool17 = icmp eq i32 %and16, 0
+ br i1 %tobool17, label %if.end20, label %if.then18
+
+if.then18: ; preds = %if.end15
+ %or19 = or i32 %4, 134217728
+ store i32 %or19, i32* %b, align 4
+ br label %if.end20
+
+if.end20: ; preds = %if.end15, %if.then18
+ %5 = phi i32 [ %4, %if.end15 ], [ %or19, %if.then18 ]
+ %and21 = and i32 %5, 32
+ %tobool22 = icmp eq i32 %and21, 0
+ br i1 %tobool22, label %if.end25, label %if.then23
+
+if.then23: ; preds = %if.end20
+ %or24 = or i32 %5, 67108864
+ store i32 %or24, i32* %b, align 4
+ br label %if.end25
+
+if.end25: ; preds = %if.end20, %if.then23
+ %6 = phi i32 [ %5, %if.end20 ], [ %or24, %if.then23 ]
+ %and26 = and i32 %6, 64
+ %tobool27 = icmp eq i32 %and26, 0
+ br i1 %tobool27, label %if.end30, label %if.then28
+
+if.then28: ; preds = %if.end25
+ %or29 = or i32 %6, 33554432
+ store i32 %or29, i32* %b, align 4
+ br label %if.end30
+
+if.end30: ; preds = %if.end25, %if.then28
+ %7 = phi i32 [ %6, %if.end25 ], [ %or29, %if.then28 ]
+ %and31 = and i32 %7, 256
+ %tobool32 = icmp eq i32 %and31, 0
+ br i1 %tobool32, label %if.end35, label %if.then33
+
+if.then33: ; preds = %if.end30
+ %or34 = or i32 %7, 8388608
+ store i32 %or34, i32* %b, align 4
+ br label %if.end35
+
+if.end35: ; preds = %if.end30, %if.then33
+ %8 = phi i32 [ %7, %if.end30 ], [ %or34, %if.then33 ]
+ %and36 = and i32 %8, 512
+ %tobool37 = icmp eq i32 %and36, 0
+ br i1 %tobool37, label %if.end40, label %if.then38
+
+if.then38: ; preds = %if.end35
+ %or39 = or i32 %8, 4194304
+ store i32 %or39, i32* %b, align 4
+ br label %if.end40
+
+if.end40: ; preds = %if.end35, %if.then38
+ %9 = phi i32 [ %8, %if.end35 ], [ %or39, %if.then38 ]
+ %and41 = and i32 %9, 1024
+ %tobool42 = icmp eq i32 %and41, 0
+ br i1 %tobool42, label %if.end45, label %if.then43
+
+if.then43: ; preds = %if.end40
+ %or44 = or i32 %9, 2097152
+ store i32 %or44, i32* %b, align 4
+ br label %if.end45
+
+if.end45: ; preds = %if.end40, %if.then43
+ %10 = phi i32 [ %9, %if.end40 ], [ %or44, %if.then43 ]
+ %and46 = and i32 %10, 2048
+ %tobool47 = icmp eq i32 %and46, 0
+ br i1 %tobool47, label %if.end50, label %if.then48
+
+if.then48: ; preds = %if.end45
+ %or49 = or i32 %10, 1048576
+ store i32 %or49, i32* %b, align 4
+ br label %if.end50
+
+if.end50: ; preds = %if.end45, %if.then48
+ %11 = phi i32 [ %10, %if.end45 ], [ %or49, %if.then48 ]
+ %and51 = and i32 %11, 4096
+ %tobool52 = icmp eq i32 %and51, 0
+ br i1 %tobool52, label %if.end55, label %if.then53
+
+if.then53: ; preds = %if.end50
+ %or54 = or i32 %11, 524288
+ store i32 %or54, i32* %b, align 4
+ br label %if.end55
+
+if.end55: ; preds = %if.end50, %if.then53
+ %12 = phi i32 [ %11, %if.end50 ], [ %or54, %if.then53 ]
+ %and56 = and i32 %12, 8192
+ %tobool57 = icmp eq i32 %and56, 0
+ br i1 %tobool57, label %if.end60, label %if.then58
+
+if.then58: ; preds = %if.end55
+ %or59 = or i32 %12, 262144
+ store i32 %or59, i32* %b, align 4
+ br label %if.end60
+
+if.end60: ; preds = %if.end55, %if.then58
+ %13 = phi i32 [ %12, %if.end55 ], [ %or59, %if.then58 ]
+ %and61 = and i32 %13, 16384
+ %tobool62 = icmp eq i32 %and61, 0
+ br i1 %tobool62, label %if.end65, label %if.then63
+
+if.then63: ; preds = %if.end60
+ %or64 = or i32 %13, 131072
+ store i32 %or64, i32* %b, align 4
+ br label %if.end65
+
+if.end65: ; preds = %if.end60, %if.then63
+ %14 = phi i32 [ %13, %if.end60 ], [ %or64, %if.then63 ]
+ %and66 = and i32 %14, 32768
+ %tobool67 = icmp eq i32 %and66, 0
+ br i1 %tobool67, label %if.end70, label %if.then68
+
+if.then68: ; preds = %if.end65
+ %or69 = or i32 %14, 65536
+ store i32 %or69, i32* %b, align 4
+ br label %if.end70
+
+if.end70: ; preds = %if.end65, %if.then68
+ %15 = phi i32 [ %14, %if.end65 ], [ %or69, %if.then68 ]
+ %and71 = and i32 %15, 128
+ %tobool72 = icmp eq i32 %and71, 0
+ br i1 %tobool72, label %if.end75, label %if.then73
+
+if.then73: ; preds = %if.end70
+ %or74 = or i32 %15, 16777216
+ store i32 %or74, i32* %b, align 4
+ br label %if.end75
+
+if.end75: ; preds = %if.end70, %if.then73
+ ret i32 0
+}
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
new file mode 100644
index 000000000000..77e3158d9bbd
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -0,0 +1,241 @@
+; RUN: opt -simplifycfg -instcombine < %s -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 -S | FileCheck %s
+
+; CHECK-LABEL: @test_simple
+; This test should succeed and end up if-converted.
+; CHECK: icmp eq i32 %b, 0
+; CHECK-NEXT: icmp ne i32 %a, 0
+; CHECK-NEXT: xor i1 %x2, true
+; CHECK-NEXT: %[[x:.*]] = or i1 %{{.*}}, %{{.*}}
+; CHECK-NEXT: br i1 %[[x]]
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define void @test_simple(i32* %p, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_recursive
+; This test should entirely fold away, leaving one large basic block.
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %next, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %next
+
+next:
+ %x3 = icmp eq i32 %c, 0
+ br i1 %x3, label %fallthrough2, label %yes3
+
+yes3:
+ store i32 2, i32* %p
+ br label %fallthrough2
+
+fallthrough2:
+ %x4 = icmp eq i32 %d, 0
+ br i1 %x4, label %end, label %yes4
+
+yes4:
+ store i32 3, i32* %p
+ br label %end
+
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_not_ifconverted
+; The code in each diamond is too large - it won't be if-converted so our
+; heuristics should say no.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_not_ifconverted(i32* %p, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ %y1 = or i32 %b, 55
+ %y2 = add i32 %y1, 24
+ %y3 = and i32 %y2, 67
+ store i32 %y3, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ %z1 = or i32 %a, 55
+ %z2 = add i32 %z1, 24
+ %z3 = and i32 %z2, 67
+ store i32 %z3, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_aliasing1
+; The store to %p clobbers the previous store, so if-converting this would
+; be illegal.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_aliasing1(i32* %p, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %y1 = load i32, i32* %p
+ %x2 = icmp eq i32 %y1, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @test_aliasing2
+; The load from %q aliases with %p, so if-converting this would be illegal.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_aliasing2(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+fallthrough:
+ %y1 = load i32, i32* %q
+ %x2 = icmp eq i32 %y1, 0
+ br i1 %x2, label %end, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+end:
+ ret void
+}
+
+declare void @f()
+
+; CHECK-LABEL: @test_diamond_simple
+; This should get if-converted.
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %no1, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+no1:
+ %z1 = add i32 %a, %b
+ br label %fallthrough
+
+fallthrough:
+ %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ]
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %no2, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+no2:
+ %z3 = sub i32 %z2, %b
+ br label %end
+
+end:
+ %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ]
+ ret i32 %z4
+}
+
+; CHECK-LABEL: @test_diamond_alias3
+; Now there is a call to f() in the bottom branch. The store in the first
+; branch would now be reordered with respect to the call if we if-converted,
+; so we must not.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define i32 @test_diamond_alias3(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+ %x1 = icmp eq i32 %a, 0
+ br i1 %x1, label %no1, label %yes1
+
+yes1:
+ store i32 0, i32* %p
+ br label %fallthrough
+
+no1:
+ call void @f()
+ %z1 = add i32 %a, %b
+ br label %fallthrough
+
+fallthrough:
+ %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ]
+ %x2 = icmp eq i32 %b, 0
+ br i1 %x2, label %no2, label %yes2
+
+yes2:
+ store i32 1, i32* %p
+ br label %end
+
+no2:
+ call void @f()
+ %z3 = sub i32 %z2, %b
+ br label %end
+
+end:
+ %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ]
+ ret i32 %z4
+}
diff --git a/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll b/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
new file mode 100644
index 000000000000..063bde83f7b3
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
@@ -0,0 +1,40 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; Make sure we don't speculate loads under AddressSanitizer.
+@g = global i32 0, align 4
+
+define i32 @TestNoAsan(i32 %cond) nounwind readonly uwtable {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %return, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* @g, align 4
+ br label %return
+
+return: ; preds = %entry, %if.then
+ %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+ ret i32 %retval
+; CHECK-LABEL: @TestNoAsan
+; CHECK: %[[LOAD:[^ ]*]] = load
+; CHECK: select{{.*}}[[LOAD]]
+; CHECK: ret i32
+}
+
+define i32 @TestAsan(i32 %cond) nounwind readonly uwtable sanitize_address {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %return, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* @g, align 4
+ br label %return
+
+return: ; preds = %entry, %if.then
+ %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+ ret i32 %retval
+; CHECK-LABEL: @TestAsan
+; CHECK: br i1
+; CHECK: load i32, i32* @g
+; CHECK: br label
+; CHECK: ret i32
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
new file mode 100644
index 000000000000..94d3565ce985
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+ br i1 %c, label %t, label %f
+
+t:
+ %v1 = load i32*, i32** %p, !dereferenceable !0
+ call void @bar(i32* %v1)
+ br label %cont
+
+f:
+ %v2 = load i32*, i32** %p, !dereferenceable !1
+ call void @baz(i32* %v2)
+ br label %cont
+
+cont:
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
new file mode 100644
index 000000000000..92bdf6ec5c1a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+ br i1 %c, label %t, label %f
+
+t:
+ %v1 = load i32*, i32** %p, !dereferenceable_or_null !0
+ call void @bar(i32* %v1)
+ br label %cont
+
+f:
+ %v2 = load i32*, i32** %p, !dereferenceable_or_null !1
+ call void @baz(i32* %v2)
+ br label %cont
+
+cont:
+ ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata.ll
new file mode 100644
index 000000000000..89815c843152
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+ br i1 %c, label %t, label %f
+
+t:
+ %v1 = load i32*, i32** %p, !align !0
+ call void @bar(i32* %v1)
+ br label %cont
+
+f:
+ %v2 = load i32*, i32** %p, !align !1
+ call void @baz(i32* %v2)
+ br label %cont
+
+cont:
+ ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll b/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
new file mode 100644
index 000000000000..0e95336bbc1f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
@@ -0,0 +1,30 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s
+
+; ConstantFoldTerminator function can convert SwitchInst with one case (and default) to
+; a conditional BranchInst. This test checks the converted BranchInst preserve the
+; make.implicit metadata.
+
+declare i32 @consume(i32*)
+declare void @trap()
+
+define i32 @copy-metadata(i32* %x) {
+
+entry:
+ %x.int = ptrtoint i32* %x to i64
+
+; CHECK: br i1 %cond, label %is_null, label %default, !make.implicit !0
+ switch i64 %x.int, label %default [
+ i64 0, label %is_null
+ ], !make.implicit !0
+
+default:
+ %0 = call i32 @consume(i32* %x)
+ ret i32 %0
+
+is_null:
+ call void @trap()
+ unreachable
+}
+
+!0 = !{}
+
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
index 0ba93d29117a..5655d5d78821 100644
--- a/test/Transforms/SimplifyCFG/speculate-math.ll
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s --check-prefix=EXPENSIVE --check-prefix=ALL
+; RUN: opt -S -simplifycfg -speculate-one-expensive-inst=false < %s | FileCheck %s --check-prefix=CHEAP --check-prefix=ALL
declare float @llvm.sqrt.f32(float) nounwind readonly
declare float @llvm.fma.f32(float, float, float) nounwind readonly
@@ -7,8 +8,26 @@ declare float @llvm.fabs.f32(float) nounwind readonly
declare float @llvm.minnum.f32(float, float) nounwind readonly
declare float @llvm.maxnum.f32(float, float) nounwind readonly
-; CHECK-LABEL: @sqrt_test(
-; CHECK: select
+; ALL-LABEL: @fdiv_test(
+; EXPENSIVE: select i1 %cmp, double %div, double 0.0
+; CHEAP-NOT: select
+
+define double @fdiv_test(double %a, double %b) {
+entry:
+ %cmp = fcmp ogt double %a, 0.0
+ br i1 %cmp, label %cond.true, label %cond.end
+
+cond.true:
+ %div = fdiv double %b, %a
+ br label %cond.end
+
+cond.end:
+ %cond = phi double [ %div, %cond.true ], [ 0.0, %entry ]
+ ret double %cond
+}
+
+; ALL-LABEL: @sqrt_test(
+; ALL: select
define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -24,8 +43,8 @@ test_sqrt.exit: ; preds = %cond.else.i, %entry
ret void
}
-; CHECK-LABEL: @fabs_test(
-; CHECK: select
+; ALL-LABEL: @fabs_test(
+; ALL: select
define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -41,8 +60,8 @@ test_fabs.exit: ; preds = %cond.else.i, %entry
ret void
}
-; CHECK-LABEL: @fma_test(
-; CHECK: select
+; ALL-LABEL: @fma_test(
+; ALL: select
define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -58,8 +77,8 @@ test_fma.exit: ; preds = %cond.else.i, %entry
ret void
}
-; CHECK-LABEL: @fmuladd_test(
-; CHECK: select
+; ALL-LABEL: @fmuladd_test(
+; ALL: select
define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -75,8 +94,8 @@ test_fmuladd.exit: ; preds = %cond.else.i, %en
ret void
}
-; CHECK-LABEL: @minnum_test(
-; CHECK: select
+; ALL-LABEL: @minnum_test(
+; ALL: select
define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
@@ -92,8 +111,8 @@ test_minnum.exit: ; preds = %cond.else.i, %ent
ret void
}
-; CHECK-LABEL: @maxnum_test(
-; CHECK: select
+; ALL-LABEL: @maxnum_test(
+; ALL: select
define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
entry:
%cmp.i = fcmp olt float %a, 0.000000e+00
diff --git a/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll b/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
index 994e47eb0d64..53daa8292da7 100644
--- a/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
+++ b/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
@@ -3,15 +3,15 @@
; not optimized into call
declare i64 addrspace(1)* @gc_call()
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
declare i32* @fake_personality_function()
define i32 @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
; CHECK-LABEL: test
entry:
; CHECK-LABEL: entry:
- ; CHECK-NEXT: %sp = invoke i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f
- %sp = invoke i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @gc_call, i32 0, i32 0, i32 0, i32 0)
+ ; CHECK-NEXT: %sp = invoke token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f
+ %sp = invoke token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @gc_call, i32 0, i32 0, i32 0, i32 0)
to label %normal unwind label %exception
exception:
diff --git a/test/Transforms/SimplifyCFG/switch-dead-default.ll b/test/Transforms/SimplifyCFG/switch-dead-default.ll
new file mode 100644
index 000000000000..e5c2ef65b318
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -0,0 +1,179 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+declare void @foo(i32)
+
+define void @test(i1 %a) {
+; CHECK-LABEL: @test
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ switch i1 %a, label %default [i1 1, label %true
+ i1 0, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+define void @test2(i2 %a) {
+; CHECK-LABEL: @test2
+ switch i2 %a, label %default [i2 0, label %case0
+ i2 1, label %case1
+ i2 2, label %case2
+ i2 3, label %case3]
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+case2:
+ call void @foo(i32 2)
+ ret void
+case3:
+ call void @foo(i32 3)
+ ret void
+default:
+; CHECK-LABEL: default1:
+; CHECK-NEXT: unreachable
+ call void @foo(i32 4)
+ ret void
+}
+
+; This one is a negative test - we know the value of the default,
+; but that's about it
+define void @test3(i2 %a) {
+; CHECK-LABEL: @test3
+ switch i2 %a, label %default [i2 0, label %case0
+ i2 1, label %case1
+ i2 2, label %case2]
+
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+case2:
+ call void @foo(i32 2)
+ ret void
+default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
+ call void @foo(i32 0)
+ ret void
+}
+
+; Negative test - check for possible overflow when computing
+; number of possible cases.
+define void @test4(i128 %a) {
+; CHECK-LABEL: @test4
+ switch i128 %a, label %default [i128 0, label %case0
+ i128 1, label %case1]
+
+case0:
+ call void @foo(i32 0)
+ ret void
+case1:
+ call void @foo(i32 1)
+ ret void
+default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
+ call void @foo(i32 0)
+ ret void
+}
+
+; All but one bit known zero
+define void @test5(i8 %a) {
+; CHECK-LABEL: @test5
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ %cmp = icmp ult i8 %a, 2
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 1, label %true
+ i8 0, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+;; All but one bit known one
+define void @test6(i8 %a) {
+; CHECK-LABEL: @test6
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ %and = and i8 %a, 254
+ %cmp = icmp eq i8 %and, 254
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 255, label %true
+ i8 254, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+; Check that we can eliminate both dead cases and dead defaults
+; within a single run of simplify-cfg
+define void @test7(i8 %a) {
+; CHECK-LABEL: @test7
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+ %and = and i8 %a, 254
+ %cmp = icmp eq i8 %and, 254
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 255, label %true
+ i8 254, label %false
+ i8 0, label %also_dead]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+also_dead:
+ call void @foo(i32 5)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+;; All but one bit known undef
+;; Note: This is currently testing an optimization which doesn't trigger. The
+;; case this is protecting against is that a bit could be assumed both zero
+;; *or* one given we know it's undef. ValueTracking doesn't do this today,
+;; but it doesn't hurt to confirm.
+define void @test8(i8 %a) {
+; CHECK-LABEL: @test8(
+; CHECK: switch i8
+ %and = and i8 %a, 254
+ %cmp = icmp eq i8 %and, undef
+ call void @llvm.assume(i1 %cmp)
+ switch i8 %a, label %default [i8 255, label %true
+ i8 254, label %false]
+true:
+ call void @foo(i32 1)
+ ret void
+false:
+ call void @foo(i32 3)
+ ret void
+default:
+ call void @foo(i32 2)
+ ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index fedf6b172d75..2887aaf52eee 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -1,7 +1,7 @@
; RUN: opt -S -simplifycfg < %s | FileCheck %s
; Radar 9342286
; Assign DebugLoc to trap instruction.
-define void @foo() nounwind ssp {
+define void @foo() nounwind ssp !dbg !0 {
; CHECK: call void @llvm.trap(), !dbg
store i32 42, i32* null, !dbg !5
ret void, !dbg !7
@@ -11,9 +11,9 @@ define void @foo() nounwind ssp {
!llvm.module.flags = !{!10}
!llvm.dbg.sp = !{!0}
-!0 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
!1 = !DIFile(filename: "foo.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 4, column: 2, scope: !6)
diff --git a/test/Transforms/SimplifyCFG/wineh-unreachable.ll b/test/Transforms/SimplifyCFG/wineh-unreachable.ll
new file mode 100644
index 000000000000..670119467dae
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/wineh-unreachable.ll
@@ -0,0 +1,83 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+declare void @Personality()
+declare void @f()
+
+; CHECK-LABEL: define void @test1()
+define void @test1() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ ; CHECK: call void @f()
+ invoke void @f()
+ to label %exit unwind label %unreachable.unwind
+exit:
+ ret void
+unreachable.unwind:
+ cleanuppad within none []
+ unreachable
+}
+
+; CHECK-LABEL: define void @test2()
+define void @test2() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %catch.pad
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind label %unreachable.unwind
+ ; CHECK: catch.pad:
+ ; CHECK-NEXT: catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+ ; CHECK: catch.body:
+ ; CHECK-NEXT: catchpad within %cs1
+ ; CHECK-NEXT: call void @f()
+ ; CHECK-NEXT: unreachable
+ %catch = catchpad within %cs1 []
+ call void @f()
+ catchret from %catch to label %unreachable
+exit:
+ ret void
+unreachable.unwind:
+ cleanuppad within none []
+ unreachable
+unreachable:
+ unreachable
+}
+
+; CHECK-LABEL: define void @test3()
+define void @test3() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %cleanup.pad
+cleanup.pad:
+ ; CHECK: %cleanup = cleanuppad within none []
+ ; CHECK-NEXT: call void @f()
+ ; CHECK-NEXT: unreachable
+ %cleanup = cleanuppad within none []
+ invoke void @f()
+ to label %cleanup.ret unwind label %unreachable.unwind
+cleanup.ret:
+ ; This cleanupret should be rewritten to unreachable,
+ ; and merged into the pred block.
+ cleanupret from %cleanup unwind label %unreachable.unwind
+exit:
+ ret void
+unreachable.unwind:
+ cleanuppad within none []
+ unreachable
+}
+
+; CHECK-LABEL: define void @test5()
+define void @test5() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+ invoke void @f()
+ to label %exit unwind label %catch.pad
+
+catch.pad:
+ %cs1 = catchswitch within none [label %catch.body] unwind to caller
+
+catch.body:
+ %catch = catchpad within %cs1 []
+ catchret from %catch to label %exit
+
+exit:
+ unreachable
+}
diff --git a/test/Transforms/Sink/catchswitch.ll b/test/Transforms/Sink/catchswitch.ll
new file mode 100644
index 000000000000..2648f85f3eb4
--- /dev/null
+++ b/test/Transforms/Sink/catchswitch.ll
@@ -0,0 +1,37 @@
+; RUN: opt -sink -S < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @h() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %call = call i32 @g(i32 1) readnone
+ invoke void @_CxxThrowException(i8* null, i8* null) noreturn
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+ catchret from %cp to label %try.cont
+
+try.cont: ; preds = %catch
+ call void @k(i32 %call)
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @g(i32) readnone
+
+declare void @k(i32)
+
+; CHECK-LABEL: define void @h(
+; CHECK: call i32 @g(i32 1)
+; CHECK-NEXT: invoke void @_CxxThrowException(
diff --git a/test/Transforms/Sink/landingpad.ll b/test/Transforms/Sink/landingpad.ll
new file mode 100644
index 000000000000..10548fd5b7d4
--- /dev/null
+++ b/test/Transforms/Sink/landingpad.ll
@@ -0,0 +1,33 @@
+; Test that we don't sink landingpads
+; RUN: opt -sink -S < %s | FileCheck %s
+
+declare hidden void @g()
+declare void @h()
+declare i32 @__gxx_personality_v0(...)
+
+define void @f() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ invoke void @g()
+ to label %invoke.cont.15 unwind label %lpad
+
+invoke.cont.15:
+ unreachable
+
+; CHECK: lpad:
+; CHECK: %0 = landingpad { i8*, i32 }
+lpad:
+ %0 = landingpad { i8*, i32 }
+ catch i8* null
+ invoke void @h()
+ to label %invoke.cont unwind label %lpad.1
+
+; CHECK: invoke.cont
+; CHECK-NOT: %0 = landingpad { i8*, i32 }
+invoke.cont:
+ ret void
+
+lpad.1:
+ %1 = landingpad { i8*, i32 }
+ cleanup
+ resume { i8*, i32 } %1
+}
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
index 278250a9c80e..f2853aca698f 100644
--- a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -57,10 +57,10 @@ bb:
; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
bb:
%i2 = shl nsw i32 %i, 1
diff --git a/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll b/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
new file mode 100644
index 000000000000..cb73565b152e
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CUDA code
+; __global__ void foo(int b, int s) {
+; #pragma unroll
+; for (int i = 0; i < 4; ++i) {
+; if (cond(i))
+; use((b + i) * s);
+; }
+; }
+define void @foo(i32 %b, i32 %s) {
+; CHECK-LABEL: .visible .entry foo(
+entry:
+; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
+; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0];
+ %call = tail call zeroext i1 @cond(i32 0)
+ br i1 %call, label %if.then, label %for.inc
+
+if.then: ; preds = %entry
+ %mul = mul nsw i32 %b, %s
+; CHECK: mul.lo.s32 [[a0:%r[0-9]+]], [[b]], [[s]]
+ tail call void @use(i32 %mul)
+ br label %for.inc
+
+for.inc: ; preds = %entry, %if.then
+ %call.1 = tail call zeroext i1 @cond(i32 1)
+ br i1 %call.1, label %if.then.1, label %for.inc.1
+
+if.then.1: ; preds = %for.inc
+ %add.1 = add nsw i32 %b, 1
+ %mul.1 = mul nsw i32 %add.1, %s
+; CHECK: add.s32 [[a1:%r[0-9]+]], [[a0]], [[s]]
+ tail call void @use(i32 %mul.1)
+ br label %for.inc.1
+
+for.inc.1: ; preds = %if.then.1, %for.inc
+ %call.2 = tail call zeroext i1 @cond(i32 2)
+ br i1 %call.2, label %if.then.2, label %for.inc.2
+
+if.then.2: ; preds = %for.inc.1
+ %add.2 = add nsw i32 %b, 2
+ %mul.2 = mul nsw i32 %add.2, %s
+; CHECK: add.s32 [[a2:%r[0-9]+]], [[a1]], [[s]]
+ tail call void @use(i32 %mul.2)
+ br label %for.inc.2
+
+for.inc.2: ; preds = %if.then.2, %for.inc.1
+ %call.3 = tail call zeroext i1 @cond(i32 3)
+ br i1 %call.3, label %if.then.3, label %for.inc.3
+
+if.then.3: ; preds = %for.inc.2
+ %add.3 = add nsw i32 %b, 3
+ %mul.3 = mul nsw i32 %add.3, %s
+; CHECK: add.s32 [[a3:%r[0-9]+]], [[a2]], [[s]]
+ tail call void @use(i32 %mul.3)
+ br label %for.inc.3
+
+for.inc.3: ; preds = %if.then.3, %for.inc.2
+ ret void
+}
+
+declare zeroext i1 @cond(i32)
+
+declare void @use(i32)
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (i32, i32)* @foo, !"kernel", i32 1}
diff --git a/test/Transforms/StripDeadPrototypes/basic.ll b/test/Transforms/StripDeadPrototypes/basic.ll
new file mode 100644
index 000000000000..6845faf7d03e
--- /dev/null
+++ b/test/Transforms/StripDeadPrototypes/basic.ll
@@ -0,0 +1,12 @@
+; RUN: opt -strip-dead-prototypes -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes=strip-dead-prototypes < %s | FileCheck %s
+
+; CHECK: declare i32 @f
+declare i32 @f()
+; CHECK-NOT: declare i32 @g
+declare i32 @g()
+
+define i32 @foo() {
+ %call = call i32 @f()
+ ret i32 %call
+}
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 3b1fd74b9813..32d7e77b20df 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -4,7 +4,7 @@
@x = common global i32 0 ; <i32*> [#uses=0]
-define void @foo() nounwind readnone optsize ssp {
+define void @foo() nounwind readnone optsize ssp !dbg !0 {
entry:
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !5, metadata !{}), !dbg !10
ret void, !dbg !11
@@ -18,12 +18,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.lv.foo = !{!5}
!llvm.dbg.gv = !{!8}
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
!1 = !DIFile(filename: "b.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 3, scope: !6, file: !1, type: !7)
+!5 = !DILocalVariable(name: "y", line: 3, scope: !6, file: !1, type: !7)
!6 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !0)
!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!8 = !DIGlobalVariable(name: "x", line: 1, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !7, variable: i32* @x)
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index f77ed11d912c..ba8979c9772f 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -1,5 +1,5 @@
; RUN: opt -strip-dead-debug-info -disable-output < %s
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !0 {
entry:
ret i32 0, !dbg !8
}
@@ -7,9 +7,9 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!14}
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3, function: i32 ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3)
!1 = !DIFile(filename: "/tmp/a.c", directory: "/Volumes/Lalgate/clean/D.CW")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 08eff003dfca..39038c955617 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -10,13 +10,13 @@
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
; Function Attrs: nounwind readnone ssp
-define i32 @fn() #1 {
+define i32 @fn() #1 !dbg !6 {
entry:
ret i32 0, !dbg !18
}
; Function Attrs: nounwind readonly ssp
-define i32 @foo(i32 %i) #2 {
+define i32 @foo(i32 %i) #2 !dbg !10 {
entry:
tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
%.0 = load i32, i32* @xyz, align 4
@@ -30,22 +30,22 @@ attributes #2 = { nounwind readonly ssp }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
!1 = !DIFile(filename: "g.c", directory: "/tmp/")
!2 = !{null}
-!3 = !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
+!3 = distinct !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
!4 = !DISubroutineType(types: !2)
!5 = !DIFile(filename: "g.c", directory: "/tmp/")
-!6 = !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7, function: i32 ()* @fn)
+!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11, function: i32 (i32)* @foo)
+!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{!9, !9}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "bb", line: 5, scope: !14, file: !5, type: !9)
+!13 = !DILocalVariable(name: "bb", line: 5, scope: !14, file: !5, type: !9)
!14 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !3)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 7, arg: 0, scope: !10, file: !5, type: !9)
+!15 = !DILocalVariable(name: "i", line: 7, arg: 1, scope: !10, file: !5, type: !9)
!16 = !DIGlobalVariable(name: "abcd", line: 2, isLocal: true, isDefinition: true, scope: !5, file: !5, type: !9)
!17 = !DIGlobalVariable(name: "xyz", line: 3, isLocal: false, isDefinition: true, scope: !5, file: !5, type: !9, variable: i32* @xyz)
!18 = !DILocation(line: 6, scope: !19)
diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
index fee1ff0433b5..8a506c3e3962 100644
--- a/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -41,7 +41,7 @@ ENDIF: ; preds = %LOOP
br i1 %tmp31, label %IF29, label %ENDIF28
; CHECK: Flow:
-; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
+; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
; CHECK: IF29:
; CHECK: br label %Flow1
diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll
index 2488b552d8f3..b303fa743ca9 100644
--- a/test/Transforms/TailCallElim/basic.ll
+++ b/test/Transforms/TailCallElim/basic.ll
@@ -156,7 +156,7 @@ define void @test9(i32* byval %a) {
declare void @ctor(%struct.X*)
define void @test10(%struct.X* noalias sret %agg.result, i1 zeroext %b) {
-; CHECK-LABEL @test10
+; CHECK-LABEL: @test10
entry:
%x = alloca %struct.X, align 8
br i1 %b, label %if.then, label %if.end
@@ -188,3 +188,13 @@ define void @test11() {
; CHECK: call void @test11_helper2
ret void
}
+
+; PR25928
+define void @test12() {
+entry:
+; CHECK-LABEL: @test12
+; CHECK: {{^ *}} call void undef(i8* undef) [ "foo"(i8* %e) ]
+ %e = alloca i8
+ call void undef(i8* undef) [ "foo"(i8* %e) ]
+ unreachable
+}
diff --git a/test/Transforms/TailCallElim/notail.ll b/test/Transforms/TailCallElim/notail.ll
new file mode 100644
index 000000000000..e6fdbd1ec77d
--- /dev/null
+++ b/test/Transforms/TailCallElim/notail.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+; CHECK: tail call void @callee0()
+; CHECK: notail call void @callee1()
+
+define void @foo1(i32 %a) {
+entry:
+ %tobool = icmp eq i32 %a, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+ call void @callee0()
+ br label %if.end
+
+if.else:
+ notail call void @callee1()
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @callee0()
+declare void @callee1()
diff --git a/test/Transforms/Util/lowerswitch.ll b/test/Transforms/Util/lowerswitch.ll
index 6e21f916c038..1eddb43c1a06 100644
--- a/test/Transforms/Util/lowerswitch.ll
+++ b/test/Transforms/Util/lowerswitch.ll
@@ -3,7 +3,7 @@
; Test that we don't crash and have a different basic block for each incoming edge.
define void @test0() {
; CHECK-LABEL: @test0
-; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock.5 ], [ 0, %LeafBlock.1 ]
+; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ]
BB1:
switch i32 undef, label %BB2 [
i32 3, label %BB2
@@ -43,9 +43,9 @@ bb2:
bb3:
; CHECK-LABEL: bb3
-; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock.3 ]
+; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock3 ]
%tmp = phi i32 [ 1, %bb1 ], [ 0, %bb2 ], [ 1, %bb1 ], [ 1, %bb1 ]
-; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock.3 ]
+; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock3 ]
%tmp2 = phi i32 [ 2, %bb1 ], [ 2, %bb1 ], [ 5, %bb2 ], [ 2, %bb1 ]
br label %exit
diff --git a/test/Transforms/Util/simplify-dbg-declare-load.ll b/test/Transforms/Util/simplify-dbg-declare-load.ll
new file mode 100644
index 000000000000..0357a5e6facb
--- /dev/null
+++ b/test/Transforms/Util/simplify-dbg-declare-load.ll
@@ -0,0 +1,52 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+%foo = type { i64, i32, i32 }
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
+
+; Function Attrs: sspreq
+define void @julia_fastshortest_6256() #1 {
+top:
+ %cp = alloca %foo, align 8
+ call void @llvm.dbg.declare(metadata %foo* %cp, metadata !1, metadata !16), !dbg !17
+ br i1 undef, label %idxend, label %fail
+
+fail: ; preds = %top
+ unreachable
+
+idxend: ; preds = %top
+; CHECK-NOT call void @llvm.dbg.value(metadata %foo* %cp, i64 0, metadata !1, metadata !16), !dbg !17
+ %0 = load volatile %foo, %foo* %cp, align 8
+; CHECK: call void @llvm.dbg.value(metadata %foo %0, i64 0, metadata !1, metadata !16), !dbg !17
+ store volatile %foo %0, %foo* undef, align 8
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { sspreq }
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !DILocalVariable(name: "cp", scope: !2, file: !3, line: 106, type: !12)
+!2 = distinct !DISubprogram(name: "fastshortest", linkageName: "julia_fastshortest_6256", scope: null, file: !3, type: !4, isLocal: false, isDefinition: true, isOptimized: true, variables: !11)
+!3 = !DIFile(filename: "grisu/fastshortest.jl", directory: ".")
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6, !7}
+!6 = !DIBasicType(name: "Float64", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 64)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "jl_value_t", file: !9, line: 71, align: 64, elements: !10)
+!9 = !DIFile(filename: "julia.h", directory: "")
+!10 = !{!7}
+!11 = !{}
+!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "Float", size: 128, align: 64, elements: !13, runtimeLang: DW_LANG_Julia)
+!13 = !{!14, !15, !15}
+!14 = !DIBasicType(name: "UInt64", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!15 = !DIBasicType(name: "Int32", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!16 = !DIExpression()
+!17 = !DILocation(line: 106, scope: !2)