summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-06-26 20:32:52 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-06-26 20:32:52 +0000
commit08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (patch)
tree80108f0f128657f8623f8f66ad9735b4d88e7b47 /test/Transforms
parent7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (diff)
Notes
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/CodeGenPrepare/X86/memcmp.ll156
-rw-r--r--test/Transforms/CorrelatedValuePropagation/add.ll95
-rw-r--r--test/Transforms/GVN/PRE/phi-translate-2.ll131
-rw-r--r--test/Transforms/GVN/PRE/pre-gep-load.ll2
-rw-r--r--test/Transforms/GVN/PRE/pre-load.ll6
-rw-r--r--test/Transforms/IndVarSimplify/huge_muls.ll87
-rw-r--r--test/Transforms/InferFunctionAttrs/annotate.ll7
-rw-r--r--test/Transforms/Inline/AArch64/switch.ll37
-rw-r--r--test/Transforms/Inline/inline-probe-stack.ll20
-rw-r--r--test/Transforms/Inline/inline-stack-probe-size.ll29
-rw-r--r--test/Transforms/InstCombine/add.ll12
-rw-r--r--test/Transforms/InstCombine/and-or-not.ll48
-rw-r--r--test/Transforms/InstCombine/bitcast-bigendian.ll18
-rw-r--r--test/Transforms/InstCombine/bitcast.ll26
-rw-r--r--test/Transforms/InstCombine/compare-3way.ll395
-rw-r--r--test/Transforms/InstCombine/ctpop.ll39
-rw-r--r--test/Transforms/InstCombine/early_constfold_changes_IR.ll20
-rw-r--r--test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll31
-rw-r--r--test/Transforms/InstCombine/icmp-xor-signbit.ll21
-rw-r--r--test/Transforms/InstCombine/intrinsics.ll41
-rw-r--r--test/Transforms/InstCombine/logical-select.ll4
-rw-r--r--test/Transforms/InstCombine/memcpy-from-global.ll31
-rw-r--r--test/Transforms/InstCombine/or-xor.ll4
-rw-r--r--test/Transforms/InstCombine/phi-select-constant.ll29
-rw-r--r--test/Transforms/InstCombine/pr33453.ll15
-rw-r--r--test/Transforms/InstCombine/select-with-bitwise-ops.ll78
-rw-r--r--test/Transforms/InstCombine/select.ll4
-rw-r--r--test/Transforms/InstCombine/set.ll22
-rw-r--r--test/Transforms/InstCombine/xor2.ll4
-rw-r--r--test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll73
-rw-r--r--test/Transforms/InterleavedAccess/X86/interleavedStore.ll17
-rw-r--r--test/Transforms/LICM/strlen.ll19
-rw-r--r--test/Transforms/LoadCombine/deadcode.ll39
-rw-r--r--test/Transforms/LoadCombine/load-combine-aa.ll63
-rw-r--r--test/Transforms/LoadCombine/load-combine-assume.ll44
-rw-r--r--test/Transforms/LoadCombine/load-combine-negativegep.ll19
-rw-r--r--test/Transforms/LoadCombine/load-combine.ll190
-rw-r--r--test/Transforms/LoopDeletion/unreachable-loops.ll76
-rwxr-xr-xtest/Transforms/LoopRotate/catchret.ll41
-rw-r--r--test/Transforms/LoopSimplify/basictest.ll243
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/bin_power.ll264
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/canonical.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll12
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll4
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/nested-loop.ll22
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll1
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll1
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll1
-rw-r--r--test/Transforms/LoopVectorize/AMDGPU/packed-math.ll34
-rw-r--r--test/Transforms/LoopVectorize/X86/small-size.ll26
-rw-r--r--test/Transforms/LoopVectorize/tripcount.ll91
-rw-r--r--test/Transforms/LowerTypeTests/export-icall.ll9
-rw-r--r--test/Transforms/NewGVN/pr33461.ll36
-rw-r--r--test/Transforms/PGOProfile/counter_promo.ll68
-rw-r--r--test/Transforms/PGOProfile/counter_promo_exit_merge.ll74
-rw-r--r--test/Transforms/PGOProfile/counter_promo_mexits.ll80
-rw-r--r--test/Transforms/PGOProfile/memop_size_from_strlen.ll14
-rw-r--r--test/Transforms/Reassociate/fast-ReassociateVector.ll18
-rw-r--r--test/Transforms/Reassociate/xor_reassoc.ll101
-rw-r--r--test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll195
-rw-r--r--test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll70
-rw-r--r--test/Transforms/SROA/non-integral-pointers.ll46
-rw-r--r--test/Transforms/SampleProfile/Inputs/einline.prof3
-rw-r--r--test/Transforms/SampleProfile/early-inline.ll16
-rw-r--r--test/Transforms/TailCallElim/reorder_load.ll27
67 files changed, 2788 insertions, 671 deletions
diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
index 328e8cc2907f2..690e714af2610 100644
--- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll
+++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -1,14 +1,50 @@
-; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32
-; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: opt -S -codegenprepare -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s | FileCheck %s --check-prefix=ALL --check-prefix=X64
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)
define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp2(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp2(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
+; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
+; X32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; X32-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X32: res_block:
+; X32-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X32-NEXT: br label %endblock
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp2(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i16*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i16*
+; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
+; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X64: res_block:
+; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X64-NEXT: br label %endblock
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
ret i32 %call
@@ -24,9 +60,45 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp4(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp4(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0
+; X32-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
+; X32: res_block:
+; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; X32-NEXT: br label %endblock
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp4(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i32*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i32*
+; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
+; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock
+; X64: res_block:
+; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1
+; X64-NEXT: br label %endblock
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
ret i32 %call
@@ -60,9 +132,28 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp8(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp8(
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; X32-NEXT: ret i32 [[CALL]]
+;
+; X64-LABEL: @cmp8(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* %x to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %y to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; X64-NEXT: br i1 [[TMP7]], label %res_block, label %endblock
+; X64: res_block:
+; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1
+; X64-NEXT: br label %endblock
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
ret i32 %call
@@ -142,8 +233,13 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq2(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i16*
+; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i16*
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
+; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
+; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -168,8 +264,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq4(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i32*
+; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i32*
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
+; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
+; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -219,11 +320,22 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq8(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq8(
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq8(
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* %x to i64*
+; X64-NEXT: [[TMP2:%.*]] = bitcast i8* %y to i64*
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
+; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
+; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
%cmp = icmp eq i32 %call, 0
diff --git a/test/Transforms/CorrelatedValuePropagation/add.ll b/test/Transforms/CorrelatedValuePropagation/add.ll
index 0ba521c894e2f..b07330aa0f262 100644
--- a/test/Transforms/CorrelatedValuePropagation/add.ll
+++ b/test/Transforms/CorrelatedValuePropagation/add.ll
@@ -212,3 +212,98 @@ then:
else:
ret i32 0
}
+
+; Check that we can gather information for conditions is the form of
+; or ( i s>= 100, Unknown )
+; CHECK-LABEL: @test12(
+define void @test12(i32 %a, i1 %flag) {
+entry:
+ %cmp.1 = icmp sge i32 %a, 100
+ %cmp = or i1 %cmp.1, %flag
+ br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+ %add = add i32 %a, 1
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check that we can gather information for conditions is the form of
+; or ( i s>= 100, i s<= 0 )
+; CHECK-LABEL: @test13(
+define void @test13(i32 %a) {
+entry:
+ %cmp.1 = icmp sge i32 %a, 100
+ %cmp.2 = icmp sle i32 %a, 0
+ %cmp = or i1 %cmp.1, %cmp.2
+ br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+ %add = add i32 %a, 1
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check that for conditions is the form of cond1 || cond2 we don't mistakenly
+; assume that cond1 || cond2 holds down to true path.
+; CHECK-LABEL: @test13_neg(
+define void @test13_neg(i32 %a) {
+entry:
+ %cmp.1 = icmp slt i32 %a, 100
+ %cmp.2 = icmp sgt i32 %a, 0
+ %cmp = or i1 %cmp.1, %cmp.2
+ br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %add = add i32 %a, 1
+ %add = add i32 %a, 1
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check that we can gather information for conditions is the form of
+; or ( i s>=100, or (i s<= 0, Unknown )
+; CHECK-LABEL: @test14(
+define void @test14(i32 %a, i1 %flag) {
+entry:
+ %cmp.1 = icmp sge i32 %a, 100
+ %cmp.2 = icmp sle i32 %a, 0
+ %cmp.3 = or i1 %cmp.2, %flag
+ %cmp = or i1 %cmp.1, %cmp.3
+ br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nuw nsw i32 %a, 1
+ %add = add i32 %a, 1
+ br label %exit
+
+exit:
+ ret void
+}
+
+; Check that we can gather information for conditions is the form of
+; or ( i s>= Unknown, ... )
+; CHECK-LABEL: @test15(
+define void @test15(i32 %a, i32 %b, i1 %flag) {
+entry:
+ %cmp.1 = icmp sge i32 %a, %b
+ %cmp = or i1 %cmp.1, %flag
+ br i1 %cmp, label %exit, label %bb
+
+bb:
+; CHECK: %add = add nsw i32 %a, 1
+ %add = add i32 %a, 1
+ br label %exit
+
+exit:
+ ret void
+}
+
diff --git a/test/Transforms/GVN/PRE/phi-translate-2.ll b/test/Transforms/GVN/PRE/phi-translate-2.ll
new file mode 100644
index 0000000000000..78681e20df5e1
--- /dev/null
+++ b/test/Transforms/GVN/PRE/phi-translate-2.ll
@@ -0,0 +1,131 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global [100 x i64] zeroinitializer, align 16
+@b = common global [100 x i64] zeroinitializer, align 16
+@g1 = common global i64 0, align 8
+@g2 = common global i64 0, align 8
+@g3 = common global i64 0, align 8
+declare i64 @goo(...) local_unnamed_addr #1
+
+define void @test1(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+ %mul = mul nsw i64 %b, %a
+ store i64 %mul, i64* @g1, align 8
+ %t0 = load i64, i64* @g2, align 8
+ %cmp = icmp sgt i64 %t0, 3
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %mul2 = mul nsw i64 %d, %c
+ store i64 %mul2, i64* @g2, align 8
+ br label %if.end
+
+; Check phi-translate works and mul is removed.
+; CHECK-LABEL: @test1(
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ {{.*}}, %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end: ; preds = %if.then, %entry
+ %b.addr.0 = phi i64 [ %d, %if.then ], [ %b, %entry ]
+ %a.addr.0 = phi i64 [ %c, %if.then ], [ %a, %entry ]
+ %mul3 = mul nsw i64 %a.addr.0, %b.addr.0
+ store i64 %mul3, i64* @g3, align 8
+ ret void
+}
+
+define void @test2(i64 %i) {
+entry:
+ %arrayidx = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i
+ %t0 = load i64, i64* %arrayidx, align 8
+ %arrayidx1 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i
+ %t1 = load i64, i64* %arrayidx1, align 8
+ %mul = mul nsw i64 %t1, %t0
+ store i64 %mul, i64* @g1, align 8
+ %cmp = icmp sgt i64 %mul, 3
+ br i1 %cmp, label %if.then, label %if.end
+
+; Check phi-translate works for the phi generated by loadpre. A new mul will be
+; inserted in if.then block.
+; CHECK-LABEL: @test2(
+; CHECK: if.then:
+; CHECK: %[[MUL_THEN:.*]] = mul
+; CHECK: br label %if.end
+if.then: ; preds = %entry
+ %call = tail call i64 (...) @goo() #2
+ store i64 %call, i64* @g2, align 8
+ br label %if.end
+
+; CHECK: if.end:
+; CHECK: %[[MULPHI:.*]] = phi i64 [ %[[MUL_THEN]], %if.then ], [ %mul, %entry ]
+; CHECK-NOT: = mul
+; CHECK: store i64 %[[MULPHI]], i64* @g3, align 8
+if.end: ; preds = %if.then, %entry
+ %i.addr.0 = phi i64 [ 3, %if.then ], [ %i, %entry ]
+ %arrayidx3 = getelementptr inbounds [100 x i64], [100 x i64]* @a, i64 0, i64 %i.addr.0
+ %t2 = load i64, i64* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds [100 x i64], [100 x i64]* @b, i64 0, i64 %i.addr.0
+ %t3 = load i64, i64* %arrayidx4, align 8
+ %mul5 = mul nsw i64 %t3, %t2
+ store i64 %mul5, i64* @g3, align 8
+ ret void
+}
+
+; Check phi-translate doesn't go through backedge, which may lead to incorrect
+; pre transformation.
+; CHECK: for.end:
+; CHECK-NOT: %{{.*pre-phi}} = phi
+; CHECK: ret void
+define void @test3(i64 %N, i64* nocapture readonly %a) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+ %add = add nuw nsw i64 %i.0, 1
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %add
+ %tmp0 = load i64, i64* %arrayidx, align 8
+ %cmp = icmp slt i64 %i.0, %N
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %call = tail call i64 (...) @goo() #2
+ %add1 = sub nsw i64 0, %call
+ %tobool = icmp eq i64 %tmp0, %add1
+ br i1 %tobool, label %for.cond, label %for.end
+
+for.end: ; preds = %for.body, %for.cond
+ %i.0.lcssa = phi i64 [ %i.0, %for.body ], [ %i.0, %for.cond ]
+ %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %i.0.lcssa
+ %tmp1 = load i64, i64* %arrayidx2, align 8
+ store i64 %tmp1, i64* @g1, align 8
+ ret void
+}
+
+; It is incorrect to use the value of %andres in last loop iteration
+; to do pre.
+; CHECK-LABEL: @test4(
+; CHECK: for.body:
+; CHECK-NOT: %andres.pre-phi = phi i32
+; CHECK: br i1 %tobool1
+
+define i32 @test4(i32 %cond, i32 %SectionAttrs.0231.ph, i32 *%AttrFlag) {
+for.body.preheader:
+ %t514 = load volatile i32, i32* %AttrFlag
+ br label %for.body
+
+for.body:
+ %t320 = phi i32 [ %t334, %bb343 ], [ %t514, %for.body.preheader ]
+ %andres = and i32 %t320, %SectionAttrs.0231.ph
+ %tobool1 = icmp eq i32 %andres, 0
+ br i1 %tobool1, label %bb343, label %critedge.loopexit
+
+bb343:
+ %t334 = load volatile i32, i32* %AttrFlag
+ %tobool2 = icmp eq i32 %cond, 0
+ br i1 %tobool2, label %critedge.loopexit, label %for.body
+
+critedge.loopexit:
+ unreachable
+}
diff --git a/test/Transforms/GVN/PRE/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll
index 9eec8bb6455b4..1b2b4d20d31da 100644
--- a/test/Transforms/GVN/PRE/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@@ -37,7 +37,7 @@ sw.bb2: ; preds = %if.end, %entry
%3 = load double, double* %arrayidx5, align 8
; CHECK: sw.bb2:
; CHECK-NOT: sext
-; CHECK-NEXT: phi double [
+; CHECK: phi double [
; CHECK-NOT: load
%sub6 = fsub double 3.000000e+00, %3
br label %return
diff --git a/test/Transforms/GVN/PRE/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll
index 685df24f62b65..ffff2b7f08e53 100644
--- a/test/Transforms/GVN/PRE/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
@@ -72,7 +72,7 @@ block4:
%PRE = load i32, i32* %P3
ret i32 %PRE
; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}
@@ -104,7 +104,7 @@ block4:
%PRE = load i32, i32* %P3
ret i32 %PRE
; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}
@@ -263,7 +263,7 @@ block4:
%PRE = load i32, i32* %P3
ret i32 %PRE
; CHECK: block4:
-; CHECK-NEXT: phi i32 [
+; CHECK: phi i32 [
; CHECK-NOT: load
; CHECK: ret i32
}
diff --git a/test/Transforms/IndVarSimplify/huge_muls.ll b/test/Transforms/IndVarSimplify/huge_muls.ll
new file mode 100644
index 0000000000000..92722ca3ce0dd
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/huge_muls.ll
@@ -0,0 +1,87 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This test takes excessively long time if SCEV tries to construct huge
+; SCEVMulExpr's (with ~1000 ops) due to non-linear analysis cost.
+define i32 @test() {
+; CHECK-LABEL: @test(
+bci_0:
+ br label %bci_12
+
+bci_133: ; preds = %bci_127.unr-lcssa
+ ret i32 %tmp17
+
+bci_12: ; preds = %bci_127.unr-lcssa, %bci_0
+ %indvars.iv184 = phi i64 [ %indvars.iv.next185, %bci_127.unr-lcssa ], [ 3, %bci_0 ]
+ %tmp1 = trunc i64 %indvars.iv184 to i32
+ br label %bci_55.postloop
+
+bci_127.unr-lcssa: ; preds = %bci_90.postloop
+ %indvars.iv.next185 = add nuw nsw i64 %indvars.iv184, 1
+ %tmp4 = icmp sgt i64 %indvars.iv184, 91
+ br i1 %tmp4, label %bci_133, label %bci_12
+
+bci_55.postloop: ; preds = %bci_90.postloop, %bci_12
+ %indvars.iv180.postloop = phi i64 [ %indvars.iv.next181.postloop, %bci_90.postloop ], [ 15, %bci_12 ]
+ %local_2_16.postloop = phi i32 [ %tmp17, %bci_90.postloop ], [ 4, %bci_12 ]
+ %indvars.iv.next181.postloop = add nuw nsw i64 %indvars.iv180.postloop, 1
+ %tmp6 = load i32, i32 addrspace(1)* undef, align 4
+ %tmp7 = mul i32 %tmp6, %tmp1
+ br label %not_zero65.us.postloop
+
+not_zero65.us.postloop: ; preds = %not_zero65.us.postloop.1, %bci_55.postloop
+ %local_2_24.us.postloop = phi i32 [ %local_2_16.postloop, %bci_55.postloop ], [ %tmp49, %not_zero65.us.postloop.1 ]
+ %local_6_.us.postloop = phi i32 [ 3, %bci_55.postloop ], [ %tmp50, %not_zero65.us.postloop.1 ]
+ %tmp8 = mul i32 %tmp7, %local_2_24.us.postloop
+ %tmp9 = mul i32 %tmp8, %local_2_24.us.postloop
+ %tmp10 = mul i32 %tmp7, %tmp9
+ %tmp11 = mul i32 %tmp10, %tmp9
+ %tmp12 = mul i32 %tmp7, %tmp11
+ %tmp13 = mul i32 %tmp12, %tmp11
+ %tmp14 = mul i32 %tmp7, %tmp13
+ %tmp15 = mul i32 %tmp14, %tmp13
+ %tmp16 = mul i32 %tmp7, %tmp15
+ %tmp17 = mul i32 %tmp16, %tmp15
+ %tmp18 = icmp sgt i32 %local_6_.us.postloop, 82
+ br i1 %tmp18, label %bci_90.postloop, label %not_zero65.us.postloop.1
+
+bci_90.postloop: ; preds = %not_zero65.us.postloop
+ %tmp19 = icmp sgt i64 %indvars.iv180.postloop, 68
+ br i1 %tmp19, label %bci_127.unr-lcssa, label %bci_55.postloop
+
+not_zero65.us.postloop.1: ; preds = %not_zero65.us.postloop
+ %tmp20 = mul i32 %tmp7, %tmp17
+ %tmp21 = mul i32 %tmp20, %tmp17
+ %tmp22 = mul i32 %tmp7, %tmp21
+ %tmp23 = mul i32 %tmp22, %tmp21
+ %tmp24 = mul i32 %tmp7, %tmp23
+ %tmp25 = mul i32 %tmp24, %tmp23
+ %tmp26 = mul i32 %tmp7, %tmp25
+ %tmp27 = mul i32 %tmp26, %tmp25
+ %tmp28 = mul i32 %tmp7, %tmp27
+ %tmp29 = mul i32 %tmp28, %tmp27
+ %tmp30 = mul i32 %tmp7, %tmp29
+ %tmp31 = mul i32 %tmp30, %tmp29
+ %tmp32 = mul i32 %tmp7, %tmp31
+ %tmp33 = mul i32 %tmp32, %tmp31
+ %tmp34 = mul i32 %tmp7, %tmp33
+ %tmp35 = mul i32 %tmp34, %tmp33
+ %tmp36 = mul i32 %tmp7, %tmp35
+ %tmp37 = mul i32 %tmp36, %tmp35
+ %tmp38 = mul i32 %tmp7, %tmp37
+ %tmp39 = mul i32 %tmp38, %tmp37
+ %tmp40 = mul i32 %tmp7, %tmp39
+ %tmp41 = mul i32 %tmp40, %tmp39
+ %tmp42 = mul i32 %tmp7, %tmp41
+ %tmp43 = mul i32 %tmp42, %tmp41
+ %tmp44 = mul i32 %tmp7, %tmp43
+ %tmp45 = mul i32 %tmp44, %tmp43
+ %tmp46 = mul i32 %tmp7, %tmp45
+ %tmp47 = mul i32 %tmp46, %tmp45
+ %tmp48 = mul i32 %tmp7, %tmp47
+ %tmp49 = mul i32 %tmp48, %tmp47
+ %tmp50 = add nsw i32 %local_6_.us.postloop, 20
+ br label %not_zero65.us.postloop
+}
diff --git a/test/Transforms/InferFunctionAttrs/annotate.ll b/test/Transforms/InferFunctionAttrs/annotate.ll
index cb4b5cdd1e8cf..80ac8f99edc04 100644
--- a/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -844,7 +844,7 @@ declare i64 @strcspn(i8*, i8*)
; CHECK: declare noalias i8* @strdup(i8* nocapture readonly) [[G0]]
declare i8* @strdup(i8*)
-; CHECK: declare i64 @strlen(i8* nocapture) [[G1]]
+; CHECK: declare i64 @strlen(i8* nocapture) [[G2:#[0-9]+]]
declare i64 @strlen(i8*)
; CHECK: declare i32 @strncasecmp(i8* nocapture, i8* nocapture, i64) [[G1]]
@@ -996,10 +996,11 @@ declare i64 @write(i32, i8*, i64)
; memset_pattern16 isn't available everywhere.
-; CHECK-DARWIN: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[G2:#[0-9]+]]
+; CHECK-DARWIN: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[G3:#[0-9]+]]
declare void @memset_pattern16(i8*, i8*, i64)
; CHECK: attributes [[G0]] = { nounwind }
; CHECK: attributes [[G1]] = { nounwind readonly }
-; CHECK-DARWIN: attributes [[G2]] = { argmemonly }
+; CHECK: attributes [[G2]] = { argmemonly nounwind readonly }
+; CHECK-DARWIN: attributes [[G3]] = { argmemonly }
diff --git a/test/Transforms/Inline/AArch64/switch.ll b/test/Transforms/Inline/AArch64/switch.ll
index a530ba7347054..154956e2b7581 100644
--- a/test/Transforms/Inline/AArch64/switch.ll
+++ b/test/Transforms/Inline/AArch64/switch.ll
@@ -121,3 +121,40 @@ define i32 @caller_jumptable(i32 %a, i32 %b, i32* %P) {
ret i32 %r
}
+
+define internal i32 @callee_negativeCost(i32 %t) {
+entry:
+ switch i32 %t, label %sw.default [
+ i32 1, label %sw.bb
+ i32 0, label %sw.bb1
+ i32 42, label %sw.bb2
+ i32 43, label %sw.bb3
+ ]
+
+sw.bb: ; preds = %entry
+ br label %cleanup
+
+sw.bb1: ; preds = %entry
+ br label %cleanup
+
+sw.bb2: ; preds = %entry
+ br label %cleanup
+
+sw.bb3: ; preds = %entry
+ br label %cleanup
+
+sw.default: ; preds = %entry
+ br label %cleanup
+
+cleanup: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+ %retval.0 = phi i32 [ 1, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 0, %sw.bb1 ], [ 0, %sw.bb ]
+ ret i32 %retval.0
+}
+
+define i32 @caller_negativeCost(i32 %t) {
+; CHECK-LABEL: @caller_negativeCost(
+; CHECK-NOT: call i32 @callee_negativeCost
+entry:
+ %call = call i32 @callee_negativeCost(i32 %t)
+ ret i32 %call
+}
diff --git a/test/Transforms/Inline/inline-probe-stack.ll b/test/Transforms/Inline/inline-probe-stack.ll
new file mode 100644
index 0000000000000..bddee16d30b7b
--- /dev/null
+++ b/test/Transforms/Inline/inline-probe-stack.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -inline -S | FileCheck %s
+
+define internal void @inner() "probe-stack"="__probestackinner" {
+ ret void
+}
+
+define void @outerNoAttribute() {
+ call void @inner()
+ ret void
+}
+
+define void @outerConflictingAttribute() "probe-stack"="__probestackouter" {
+ call void @inner()
+ ret void
+}
+
+; CHECK: define void @outerNoAttribute() #0
+; CHECK: define void @outerConflictingAttribute() #1
+; CHECK: attributes #0 = { "probe-stack"="__probestackinner" }
+; CHECK: attributes #1 = { "probe-stack"="__probestackouter" }
diff --git a/test/Transforms/Inline/inline-stack-probe-size.ll b/test/Transforms/Inline/inline-stack-probe-size.ll
new file mode 100644
index 0000000000000..d24da462d2ef8
--- /dev/null
+++ b/test/Transforms/Inline/inline-stack-probe-size.ll
@@ -0,0 +1,29 @@
+; RUN: opt %s -inline -S | FileCheck %s
+
+define internal void @innerSmall() "stack-probe-size"="4096" {
+ ret void
+}
+
+define internal void @innerLarge() "stack-probe-size"="8192" {
+ ret void
+}
+
+define void @outerNoAttribute() {
+ call void @innerSmall()
+ ret void
+}
+
+define void @outerConflictingAttributeSmall() "stack-probe-size"="4096" {
+ call void @innerLarge()
+ ret void
+}
+
+define void @outerConflictingAttributeLarge() "stack-probe-size"="8192" {
+ call void @innerSmall()
+ ret void
+}
+
+; CHECK: define void @outerNoAttribute() #0
+; CHECK: define void @outerConflictingAttributeSmall() #0
+; CHECK: define void @outerConflictingAttributeLarge() #0
+; CHECK: attributes #0 = { "stack-probe-size"="4096" }
diff --git a/test/Transforms/InstCombine/add.ll b/test/Transforms/InstCombine/add.ll
index 5f7101e8feca0..9cc2ae4fcb094 100644
--- a/test/Transforms/InstCombine/add.ll
+++ b/test/Transforms/InstCombine/add.ll
@@ -1,12 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; TODO: This should be canonicalized to either a select or xor+zext.
-
define i32 @select_0_or_1_from_bool(i1 %x) {
; CHECK-LABEL: @select_0_or_1_from_bool(
-; CHECK-NEXT: [[EXT:%.*]] = sext i1 %x to i32
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[EXT]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i1 %x, true
+; CHECK-NEXT: [[ADD:%.*]] = zext i1 [[TMP1]] to i32
; CHECK-NEXT: ret i32 [[ADD]]
;
%ext = sext i1 %x to i32
@@ -14,12 +12,10 @@ define i32 @select_0_or_1_from_bool(i1 %x) {
ret i32 %add
}
-; TODO: This should be canonicalized to either a select or xor+zext.
-
define <2 x i32> @select_0_or_1_from_bool_vec(<2 x i1> %x) {
; CHECK-LABEL: @select_0_or_1_from_bool_vec(
-; CHECK-NEXT: [[EXT:%.*]] = sext <2 x i1> %x to <2 x i32>
-; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i32> [[EXT]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i1> %x, <i1 true, i1 true>
+; CHECK-NEXT: [[ADD:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[ADD]]
;
%ext = sext <2 x i1> %x to <2 x i32>
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index a8e32bd77f7f3..28881668ca899 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; PR1510
@@ -516,3 +517,50 @@ define i64 @PR32830(i64 %a, i64 %b, i64 %c) {
ret i64 %and
}
+; (~a | b) & (~b | a) --> ~(a ^ b)
+; TODO: this increases instrunction count if the pieces have additional users
+define i32 @and_to_nxor_multiuse(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor_multiuse(
+; CHECK-NEXT: [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT: [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT: [[NOTA:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT: [[NOTB:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[NOTA]], [[B]]
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[NOTB]], [[A]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR1]], [[OR2]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL1]], [[AND]]
+; CHECK-NEXT: ret i32 [[MUL2]]
+;
+ %a = fptosi float %fa to i32
+ %b = fptosi float %fb to i32
+ %nota = xor i32 %a, -1
+ %notb = xor i32 %b, -1
+ %or1 = or i32 %nota, %b
+ %or2 = or i32 %notb, %a
+ %and = and i32 %or1, %or2
+ %mul1 = mul i32 %or1, %or2 ; here to increase the use count of the inputs to the and
+ %mul2 = mul i32 %mul1, %and
+ ret i32 %mul2
+}
+
+; (a & b) | ~(a | b) --> ~(a ^ b)
+; TODO: this increases instrunction count if the pieces have additional users
+define i32 @or_to_nxor_multiuse(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor_multiuse(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT: [[NOTOR:%.*]] = xor i32 [[OR]], -1
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT: [[MUL2:%.*]] = mul i32 [[MUL1]], [[OR2]]
+; CHECK-NEXT: ret i32 [[MUL2]]
+;
+ %and = and i32 %a, %b
+ %or = or i32 %a, %b
+ %notor = xor i32 %or, -1
+ %or2 = or i32 %and, %notor
+ %mul1 = mul i32 %and, %notor ; here to increase the use count of the inputs to the or
+ %mul2 = mul i32 %mul1, %or2
+ ret i32 %mul2
+}
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index e940f0fcec75e..0001fab8c16ae 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -92,12 +92,12 @@ define <2 x float> @test6(float %A){
ret <2 x float> %tmp35
}
-; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast.
+; No change. Bitcasts are canonicalized above bitwise logic.
define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
; CHECK-LABEL: @xor_bitcast_vec_to_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = xor <1 x i64> [[A:%.*]], <i64 4294967298>
-; CHECK-NEXT: [[T2:%.*]] = bitcast <1 x i64> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT: [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i32> [[T2]]
;
%t1 = bitcast <1 x i64> %a to <2 x i32>
@@ -105,12 +105,12 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
ret <2 x i32> %t2
}
-; Verify that 'and' of integer and constant is done as a vector bitwise op before the bitcast.
+; No change. Bitcasts are canonicalized above bitwise logic.
define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
; CHECK-LABEL: @and_bitcast_vec_to_int(
-; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 0, i32 3>
-; CHECK-NEXT: [[T2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CHECK-NEXT: [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
+; CHECK-NEXT: [[T2:%.*]] = and i64 [[T1]], 3
; CHECK-NEXT: ret i64 [[T2]]
;
%t1 = bitcast <2 x i32> %a to i64
@@ -118,12 +118,12 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
ret i64 %t2
}
-; Verify that 'or' of vector and constant is done as an integer bitwise op before the bitcast.
+; No change. Bitcasts are canonicalized above bitwise logic.
define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
; CHECK-LABEL: @or_bitcast_int_to_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[A:%.*]], 4294967298
-; CHECK-NEXT: [[T2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
+; CHECK-NEXT: [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i32> [[T2]]
;
%t1 = bitcast i64 %a to <2 x i32>
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 4cf3f27ab0143..0f0cbdb364af5 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -31,12 +31,12 @@ define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
ret <2 x i32> %t3
}
-; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast.
+; No change. Bitcasts are canonicalized above bitwise logic.
define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
; CHECK-LABEL: @xor_bitcast_vec_to_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = xor <1 x i64> [[A:%.*]], <i64 8589934593>
-; CHECK-NEXT: [[T2:%.*]] = bitcast <1 x i64> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT: [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i32> [[T2]]
;
%t1 = bitcast <1 x i64> %a to <2 x i32>
@@ -44,12 +44,12 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
ret <2 x i32> %t2
}
-; Verify that 'and' of integer and constant is done as a vector bitwise op before the bitcast.
+; No change. Bitcasts are canonicalized above bitwise logic.
define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
; CHECK-LABEL: @and_bitcast_vec_to_int(
-; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 3, i32 0>
-; CHECK-NEXT: [[T2:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CHECK-NEXT: [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
+; CHECK-NEXT: [[T2:%.*]] = and i64 [[T1]], 3
; CHECK-NEXT: ret i64 [[T2]]
;
%t1 = bitcast <2 x i32> %a to i64
@@ -57,12 +57,12 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
ret i64 %t2
}
-; Verify that 'or' of vector and constant is done as an integer bitwise op before the bitcast.
+; No change. Bitcasts are canonicalized above bitwise logic.
define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
; CHECK-LABEL: @or_bitcast_int_to_vec(
-; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[A:%.*]], 8589934593
-; CHECK-NEXT: [[T2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
+; CHECK-NEXT: [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
; CHECK-NEXT: ret <2 x i32> [[T2]]
;
%t1 = bitcast i64 %a to <2 x i32>
@@ -71,7 +71,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
}
; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702
-; Bitcast is canonicalized below logic, so we can see the not-not pattern.
+; Bitcast is canonicalized above logic, so we can see the not-not pattern.
define <2 x i64> @is_negative(<4 x i32> %x) {
; CHECK-LABEL: @is_negative(
@@ -102,12 +102,12 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
ret <4 x i32> %bc2
}
-; Negative test: bitcasts are canonicalized below bitwise logic. No changes here.
+; Bitcasts are canonicalized above bitwise logic.
define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
-; CHECK-NEXT: [[A:%.*]] = and <4 x i4> %x, <i4 0, i4 -8, i4 0, i4 -8>
-; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i4> [[A]] to <2 x i8>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 -128, i8 -128>
; CHECK-NEXT: ret <2 x i8> [[B]]
;
%a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
diff --git a/test/Transforms/InstCombine/compare-3way.ll b/test/Transforms/InstCombine/compare-3way.ll
new file mode 100644
index 0000000000000..663d470df8745
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-3way.ll
@@ -0,0 +1,395 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare void @use(i32)
+
+; These 18 exercise all combinations of signed comparison
+; for each of the three values produced by your typical
+; 3way compare function (-1, 0, 1)
+
+define void @test_low_sgt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_sgt
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sgt i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_low_slt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_slt
+; CHECK: br i1 false, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp slt i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_low_sge(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_sge
+; CHECK: br i1 true, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sge i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_low_sle(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_sle
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sle i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_low_ne(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_ne
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp ne i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_low_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_low_eq
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp eq i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_mid_sgt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_sgt
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sgt i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_mid_slt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_slt
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp slt i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_mid_sge(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_sge
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sge i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_mid_sle(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_sle
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sle i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_mid_ne(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_ne
+; CHECK: [[TMP1:%.*]] = icmp eq i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp ne i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_mid_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_mid_eq
+; CHECK: icmp eq i64 %a, %b
+; CHECK: br i1 %eq, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp eq i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_high_sgt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_sgt
+; CHECK: br i1 false, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sgt i32 %result, 1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_high_slt(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_slt
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp slt i32 %result, 1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_high_sge(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_sge
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sge i32 %result, 1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_high_sle(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_sle
+; CHECK: br i1 true, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp sle i32 %result, 1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_high_ne(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_ne
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %normal, label %unreached
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp ne i32 %result, 1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @test_high_eq(i64 %a, i64 %b) {
+; CHECK-LABEL: @test_high_eq
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -1, i32 1
+ %result = select i1 %eq, i32 0, i32 %.
+ %cmp = icmp eq i32 %result, 1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+; These five make sure we didn't accidentally hard code one of the
+; produced values
+
+define void @non_standard_low(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_low
+; CHECK: [[TMP1:%.*]] = icmp slt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -3, i32 -1
+ %result = select i1 %eq, i32 -2, i32 %.
+ %cmp = icmp eq i32 %result, -3
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @non_standard_mid(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_mid
+; CHECK: icmp eq i64 %a, %b
+; CHECK: br i1 %eq, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -3, i32 -1
+ %result = select i1 %eq, i32 -2, i32 %.
+ %cmp = icmp eq i32 %result, -2
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @non_standard_high(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_high
+; CHECK: [[TMP1:%.*]] = icmp sgt i64 %a, %b
+; CHECK: br i1 [[TMP1]], label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -3, i32 -1
+ %result = select i1 %eq, i32 -2, i32 %.
+ %cmp = icmp eq i32 %result, -1
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @non_standard_bound1(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_bound1
+; CHECK: br i1 false, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -3, i32 -1
+ %result = select i1 %eq, i32 -2, i32 %.
+ %cmp = icmp eq i32 %result, -20
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
+
+define void @non_standard_bound2(i64 %a, i64 %b) {
+; CHECK-LABEL: @non_standard_bound2
+; CHECK: br i1 false, label %unreached, label %normal
+ %eq = icmp eq i64 %a, %b
+ %slt = icmp slt i64 %a, %b
+ %. = select i1 %slt, i32 -3, i32 -1
+ %result = select i1 %eq, i32 -2, i32 %.
+ %cmp = icmp eq i32 %result, 0
+ br i1 %cmp, label %unreached, label %normal
+normal:
+ ret void
+unreached:
+ call void @use(i32 %result)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll
index d49a907ffce1d..e8e3603e4cb8a 100644
--- a/test/Transforms/InstCombine/ctpop.ll
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -3,6 +3,8 @@
declare i32 @llvm.ctpop.i32(i32)
declare i8 @llvm.ctpop.i8(i8)
+declare i1 @llvm.ctpop.i1(i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
declare void @llvm.assume(i1)
define i1 @test1(i32 %arg) {
@@ -44,7 +46,7 @@ define i1 @test3(i32 %arg) {
; Negative test for when we know nothing
define i1 @test4(i8 %arg) {
; CHECK-LABEL: @test4(
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]])
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[ARG:%.*]]), !range ![[RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
@@ -55,16 +57,41 @@ define i1 @test4(i8 %arg) {
; Test when the number of possible known bits isn't one less than a power of 2
; and the compare value is greater but less than the next power of 2.
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @test5(i32 %arg) {
; CHECK-LABEL: @test5(
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 3
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[AND]])
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%and = and i32 %arg, 3
%cnt = call i32 @llvm.ctpop.i32(i32 %and)
%res = icmp eq i32 %cnt, 3
ret i1 %res
}
+
+; Test when the number of possible known bits isn't one less than a power of 2
+; and the compare value is greater but less than the next power of 2.
+; TODO: The icmp is unnecessary given the known bits of the input, but range
+; metadata doesn't support vectors
+define <2 x i1> @test5vec(<2 x i32> %arg) {
+; CHECK-LABEL: @test5vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
+; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], <i32 3, i32 3>
+; CHECK-NEXT: ret <2 x i1> [[RES]]
+;
+ %and = and <2 x i32> %arg, <i32 3, i32 3>
+ %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %and)
+ %res = icmp eq <2 x i32> %cnt, <i32 3, i32 3>
+ ret <2 x i1> %res
+}
+
+; Make sure we don't add range metadata to i1 ctpop.
+define i1 @test6(i1 %arg) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
+ ret i1 %cnt
+}
+
+; CHECK: ![[RANGE]] = !{i8 0, i8 9}
diff --git a/test/Transforms/InstCombine/early_constfold_changes_IR.ll b/test/Transforms/InstCombine/early_constfold_changes_IR.ll
new file mode 100644
index 0000000000000..18b2192348012
--- /dev/null
+++ b/test/Transforms/InstCombine/early_constfold_changes_IR.ll
@@ -0,0 +1,20 @@
+; This run line verifies that we get the expected constant fold.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This run line verifies that InstructionCombiningPass::runOnFunction reports
+; this as a modification of the IR.
+; RUN: opt < %s -instcombine -disable-output -debug-pass=Details 2>&1 | FileCheck %s --check-prefix=DETAILS
+
+define i32 @foo(i32 %arg) #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 7
+; CHECK-NEXT: ret i32 [[AND]]
+;
+entry:
+ %or = or i32 0, 7
+ %and = and i32 %arg, %or
+ ret i32 %and
+}
+
+; DETAILS: Made Modification 'Combine redundant instructions' on Function 'foo'
diff --git a/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll b/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll
new file mode 100644
index 0000000000000..743477621fa18
--- /dev/null
+++ b/test/Transforms/InstCombine/early_dce_clobbers_callgraph.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; This test case exposed a bug in instcombine where the early
+; DCE of a call wasn't recognized as changing the IR.
+; So when runOnFunction propagated the "made changes" upwards
+; to the CallGraphSCCPass it signalled that no changes had been
+; made, so CallGraphSCCPass assumed that the old CallGraph,
+; as known by that pass manager, still was up-to-date.
+;
+; This was detected as an assert when trying to remove the
+; no longer used function 'bar' (due to incorrect reference
+; count in the CallGraph).
+
+attributes #0 = { noinline norecurse nounwind readnone }
+
+define void @foo() #0 {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+;
+entry:
+ %call = call i32 @bar()
+ ret void
+}
+
+define internal i32 @bar() #0 {
+; CHECK-NOT: bar
+entry:
+ ret i32 42
+}
+
diff --git a/test/Transforms/InstCombine/icmp-xor-signbit.ll b/test/Transforms/InstCombine/icmp-xor-signbit.ll
index 30a9668f37df8..dab9b5e9fefe6 100644
--- a/test/Transforms/InstCombine/icmp-xor-signbit.ll
+++ b/test/Transforms/InstCombine/icmp-xor-signbit.ll
@@ -188,16 +188,13 @@ define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) {
}
; PR33138, part 2: https://bugs.llvm.org/show_bug.cgi?id=33138
-; TODO: We could look through vector bitcasts for icmp folds,
-; or we could canonicalize bitcast ahead of logic ops with constants.
+; Bitcast canonicalization ensures that we recognize the signbit constant.
define <8 x i1> @sgt_to_ugt_bitcasted_splat(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @sgt_to_ugt_bitcasted_splat(
-; CHECK-NEXT: [[A:%.*]] = xor <2 x i32> %x, <i32 -2139062144, i32 -2139062144>
-; CHECK-NEXT: [[B:%.*]] = xor <2 x i32> %y, <i32 -2139062144, i32 -2139062144>
-; CHECK-NEXT: [[C:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-; CHECK-NEXT: [[D:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-; CHECK-NEXT: [[E:%.*]] = icmp sgt <8 x i8> [[C]], [[D]]
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> %x to <8 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> %y to <8 x i8>
+; CHECK-NEXT: [[E:%.*]] = icmp ugt <8 x i8> [[TMP1]], [[TMP2]]
; CHECK-NEXT: ret <8 x i1> [[E]]
;
%a = xor <2 x i32> %x, <i32 2155905152, i32 2155905152> ; 0x80808080
@@ -208,17 +205,11 @@ define <8 x i1> @sgt_to_ugt_bitcasted_splat(<2 x i32> %x, <2 x i32> %y) {
ret <8 x i1> %e
}
-; TODO: This is false (little-endian). How should that be recognized?
-; Ie, should InstSimplify know this directly, should InstCombine canonicalize
-; this so InstSimplify can know this, or is that not something that we want
-; either pass to recognize?
+; Bitcast canonicalization ensures that we recognize the signbit constant.
define <2 x i1> @negative_simplify_splat(<4 x i8> %x) {
; CHECK-LABEL: @negative_simplify_splat(
-; CHECK-NEXT: [[A:%.*]] = or <4 x i8> %x, <i8 0, i8 -128, i8 0, i8 -128>
-; CHECK-NEXT: [[B:%.*]] = bitcast <4 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT: [[C:%.*]] = icmp sgt <2 x i16> [[B]], zeroinitializer
-; CHECK-NEXT: ret <2 x i1> [[C]]
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
;
%a = or <4 x i8> %x, <i8 0, i8 128, i8 0, i8 128>
%b = bitcast <4 x i8> %a to <2 x i16>
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 1b1ed606868f2..c294d79f15efe 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -16,6 +16,8 @@ declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
declare double @llvm.powi.f64(double, i32) nounwind readonly
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i1 @llvm.cttz.i1(i1, i1) nounwind readnone
+declare i1 @llvm.ctlz.i1(i1, i1) nounwind readnone
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
@@ -293,6 +295,16 @@ define <2 x i32> @cttz_vec(<2 x i32> %a) {
ret <2 x i32> %count
}
+; Make sure we don't add range metadata to i1 cttz.
+define i1 @cttz_i1(i1 %arg) {
+; CHECK-LABEL: @cttz_i1(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.cttz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 false) nounwind readnone
+ ret i1 %cnt
+}
+
define i1 @cttz_knownbits(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits(
; CHECK-NEXT: ret i1 false
@@ -316,7 +328,7 @@ define <2 x i1> @cttz_knownbits_vec(<2 x i32> %arg) {
define i1 @cttz_knownbits2(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true)
+; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2, !range ![[CTTZ_RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
@@ -339,13 +351,9 @@ define <2 x i1> @cttz_knownbits2_vec(<2 x i32> %arg) {
ret <2 x i1> %res
}
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @cttz_knownbits3(i32 %arg) {
; CHECK-LABEL: @cttz_knownbits3(
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4
-; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #2
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%or = or i32 %arg, 4
%cnt = call i32 @llvm.cttz.i32(i32 %or, i1 true) nounwind readnone
@@ -387,6 +395,16 @@ define <2 x i8> @ctlz_vec(<2 x i8> %a) {
ret <2 x i8> %count
}
+; Make sure we don't add range metadata to i1 ctlz.
+define i1 @ctlz_i1(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1(
+; CHECK-NEXT: [[CNT:%.*]] = call i1 @llvm.ctlz.i1(i1 [[ARG:%.*]], i1 false) #2
+; CHECK-NEXT: ret i1 [[CNT]]
+;
+ %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 false) nounwind readnone
+ ret i1 %cnt
+}
+
define i1 @ctlz_knownbits(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits(
; CHECK-NEXT: ret i1 false
@@ -410,7 +428,7 @@ define <2 x i1> @ctlz_knownbits_vec(<2 x i8> %arg) {
define i1 @ctlz_knownbits2(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits2(
; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true)
+; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2, !range ![[CTLZ_RANGE:[0-9]+]]
; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 2
; CHECK-NEXT: ret i1 [[RES]]
;
@@ -433,13 +451,9 @@ define <2 x i1> @ctlz_knownbits2_vec(<2 x i8> %arg) {
ret <2 x i1> %res
}
-; TODO: The icmp is unnecessary given the known bits of the input.
define i1 @ctlz_knownbits3(i8 %arg) {
; CHECK-LABEL: @ctlz_knownbits3(
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32
-; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #2
-; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 3
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: ret i1 false
;
%or = or i8 %arg, 32
%cnt = call i8 @llvm.ctlz.i8(i8 %or, i1 true) nounwind readnone
@@ -790,3 +804,6 @@ define void @nearbyint(double *%P) {
store volatile double %C, double* %P
ret void
}
+
+; CHECK: [[CTTZ_RANGE]] = !{i32 0, i32 3}
+; CHECK: [[CTLZ_RANGE]] = !{i8 0, i8 3}
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index 7f0bd23eb8a5e..6c00dec60ed6a 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -342,8 +342,8 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6
; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> %cmp to <4 x i32>
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[BC1]], %a
-; CHECK-NEXT: [[NEG:%.*]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT: [[BC2:%.*]] = bitcast <4 x i32> [[NEG]] to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
+; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[BC2]], %b
; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 7c9384d89ba34..c8b329e94b06e 100644
--- a/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -204,3 +204,34 @@ define void @test9_addrspacecast() {
; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*))
ret void
}
+
+@bbb = local_unnamed_addr global [1000000 x i8] zeroinitializer, align 16
+@_ZL3KKK = internal unnamed_addr constant [3 x i8] c"\01\01\02", align 1
+
+; Should not replace alloca with global because of size mismatch.
+define void @test9_small_global() {
+; CHECK-LABEL: @test9_small_global(
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK,
+; CHECK: alloca [1000000 x i8]
+entry:
+ %cc = alloca [1000000 x i8], align 16
+ %cc.0..sroa_idx = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 1000000, i32 16, i1 false)
+ ret void
+}
+
+; Should replace alloca with global as they have exactly the same size.
+define void @test10_same_global() {
+; CHECK-LABEL: @test10_same_global(
+; CHECK-NOT: alloca
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK,{{.*}}, i64 3,
+entry:
+ %cc = alloca [3 x i8], align 1
+ %cc.0..sroa_idx = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 3, i32 1, i1 false)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index 485f9612376ad..af62c2dd4ba32 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -316,7 +316,7 @@ define i8 @test17(i8 %A, i8 %B) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR1]], 33
; CHECK-NEXT: [[RES:%.*]] = mul i8 [[OR]], [[XOR2]]
; CHECK-NEXT: ret i8 [[RES]]
;
@@ -333,7 +333,7 @@ define i8 @test18(i8 %A, i8 %B) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR2]], [[XOR1]]
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[XOR1]], 33
; CHECK-NEXT: [[RES:%.*]] = mul i8 [[OR]], [[XOR2]]
; CHECK-NEXT: ret i8 [[RES]]
;
diff --git a/test/Transforms/InstCombine/phi-select-constant.ll b/test/Transforms/InstCombine/phi-select-constant.ll
index 272594d7f4f9c..83c4efb2a789e 100644
--- a/test/Transforms/InstCombine/phi-select-constant.ll
+++ b/test/Transforms/InstCombine/phi-select-constant.ll
@@ -55,3 +55,32 @@ final:
%sel = select <4 x i1> %phinode, <4 x i64> zeroinitializer, <4 x i64> <i64 124, i64 125, i64 126, i64 127>
ret <4 x i64> %sel
}
+
+; Test PR33364
+; Insert the generated select into the same block as the incoming phi value.
+; phi has constant vectors along with a single non-constant vector as operands.
+define <2 x i8> @vec3(i1 %cond1, i1 %cond2, <2 x i1> %x, <2 x i8> %y, <2 x i8> %z) {
+; CHECK-LABEL: @vec3
+; CHECK-LABEL: entry:
+; CHECK-NEXT: [[PHITMP1:%.*]] = shufflevector <2 x i8> %y, <2 x i8> %z, <2 x i32> <i32 2, i32 1>
+entry:
+ br i1 %cond1, label %if1, label %else
+
+; CHECK-LABEL: if1:
+; CHECK-NEXT: [[PHITMP2:%.*]] = shufflevector <2 x i8> %y, <2 x i8> %z, <2 x i32> <i32 0, i32 3>
+if1:
+ br i1 %cond2, label %if2, label %else
+
+; CHECK-LABEL: if2:
+; CHECK-NEXT: [[PHITMP3:%.*]] = select <2 x i1> %x, <2 x i8> %y, <2 x i8> %z
+if2:
+ br label %else
+
+; CHECK-LABEL: else:
+; CHECK-NEXT: [[PHITMP4:%.*]] = phi <2 x i8> [ [[PHITMP3]], %if2 ], [ [[PHITMP1]], %entry ], [ [[PHITMP2]], %if1 ]
+; CHECK-NEXT: ret <2 x i8> [[PHITMP4]]
+else:
+ %phi = phi <2 x i1> [ %x, %if2 ], [ <i1 0, i1 1>, %entry ], [ <i1 1, i1 0>, %if1 ]
+ %sel = select <2 x i1> %phi, <2 x i8> %y, <2 x i8> %z
+ ret <2 x i8> %sel
+}
diff --git a/test/Transforms/InstCombine/pr33453.ll b/test/Transforms/InstCombine/pr33453.ll
new file mode 100644
index 0000000000000..dee4c5bf566be
--- /dev/null
+++ b/test/Transforms/InstCombine/pr33453.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+@g1 = external global i16
+@g2 = external global i16
+
+define float @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT: ret float fmul (float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float), float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float))
+;
+ %call = call float @fabsf(float fmul (float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float), float uitofp (i1 icmp eq (i16* getelementptr inbounds (i16, i16* @g2, i64 1), i16* @g1) to float)))
+ ret float %call
+}
+
+declare float @fabsf(float)
diff --git a/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/test/Transforms/InstCombine/select-with-bitwise-ops.ll
index faeb4e046aca8..caec9412a7fd6 100644
--- a/test/Transforms/InstCombine/select-with-bitwise-ops.ll
+++ b/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -104,12 +104,11 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
-; CHECK-NEXT: [[AND:%.*]] = lshr i32 %x, 27
-; CHECK-NEXT: [[AND_TR:%.*]] = trunc i32 [[AND]] to i8
-; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[AND_TR]], 8
-; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], 8
-; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP2]], %y
-; CHECK-NEXT: ret i8 [[TMP3]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1073741824
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], 8
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i8 [[Y]], i8 [[OR]]
+; CHECK-NEXT: ret i8 [[SELECT]]
;
%and = and i32 %x, 1073741824
%cmp = icmp ne i32 0, %and
@@ -120,12 +119,11 @@ define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
; CHECK-LABEL: @select_icmp_ne_0_and_8_or_1073741824(
-; CHECK-NEXT: [[AND:%.*]] = and i8 %x, 8
-; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[AND]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 27
-; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 1073741824
-; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], %y
-; CHECK-NEXT: ret i32 [[TMP4]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 8
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 1073741824
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: ret i32 [[SELECT]]
;
%and = and i8 %x, 8
%cmp = icmp ne i8 0, %and
@@ -299,11 +297,10 @@ define i32 @test67(i16 %x) {
define i32 @test68(i32 %x, i32 %y) {
; CHECK-LABEL: @test68(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
-; CHECK-NEXT: ret i32 [[SELECT]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[TMP3]]
;
%and = and i32 %x, 128
%cmp = icmp eq i32 %and, 0
@@ -314,11 +311,11 @@ define i32 @test68(i32 %x, i32 %y) {
define i32 @test69(i32 %x, i32 %y) {
; CHECK-LABEL: @test69(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
-; CHECK-NEXT: ret i32 [[SELECT]]
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[TMP4]]
;
%and = and i32 %x, 128
%cmp = icmp ne i32 %and, 0
@@ -377,15 +374,13 @@ define i32 @no_shift_xor_multiuse_or(i32 %x, i32 %y) {
ret i32 %res
}
-; TODO this increased the number of instructions
define i32 @shift_xor_multiuse_or(i32 %x, i32 %y) {
; CHECK-LABEL: @shift_xor_multiuse_or(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
-; CHECK-NEXT: [[AND:%.*]] = lshr i32 [[X:%.*]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[AND]], 2048
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[OR]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[OR]]
; CHECK-NEXT: ret i32 [[RES]]
;
%and = and i32 %x, 4096
@@ -452,16 +447,14 @@ define i32 @no_shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res
}
-; TODO this increased the number of instructions
define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @shift_xor_multiuse_cmp(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y:%.*]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: ret i32 [[RES]]
;
%and = and i32 %x, 4096
@@ -473,16 +466,14 @@ define i32 @shift_xor_multiuse_cmp(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res
}
-; TODO this increased the number of instructions
define i32 @shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @shift_no_xor_multiuse_cmp_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
; CHECK-NEXT: ret i32 [[RES2]]
;
@@ -517,16 +508,14 @@ define i32 @no_shift_no_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res2
}
-; TODO this increased the number of instructions
define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @no_shift_xor_multiuse_cmp_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 4096
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[AND]], 4096
-; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[Y]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP2]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
; CHECK-NEXT: ret i32 [[RES2]]
;
@@ -540,17 +529,14 @@ define i32 @no_shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
ret i32 %res2
}
-; TODO this increased the number of instructions
define i32 @shift_xor_multiuse_cmp_or(i32 %x, i32 %y, i32 %z, i32 %w) {
; CHECK-LABEL: @shift_xor_multiuse_cmp_or(
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 4096
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 2048
-; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[AND]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], 2048
-; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[Y]]
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i32 [[Y]], i32 [[OR]]
; CHECK-NEXT: [[SELECT2:%.*]] = select i1 [[CMP]], i32 [[Z:%.*]], i32 [[W:%.*]]
-; CHECK-NEXT: [[RES:%.*]] = mul i32 [[TMP3]], [[SELECT2]]
+; CHECK-NEXT: [[RES:%.*]] = mul i32 [[SELECT]], [[SELECT2]]
; CHECK-NEXT: [[RES2:%.*]] = mul i32 [[RES]], [[OR]]
; CHECK-NEXT: ret i32 [[RES2]]
;
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index c26380eaa71be..0f94235982b99 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -926,7 +926,7 @@ while.body:
define i32 @test76(i1 %flag, i32* %x) {
; The load here must not be speculated around the select. One side of the
-; select is trivially dereferencable but may have a lower alignment than the
+; select is trivially dereferenceable but may have a lower alignment than the
; load does.
; CHECK-LABEL: @test76(
; CHECK: store i32 0, i32* %x
@@ -943,7 +943,7 @@ declare void @scribble_on_i32(i32*)
define i32 @test77(i1 %flag, i32* %x) {
; The load here must not be speculated around the select. One side of the
-; select is trivially dereferencable but may have a lower alignment than the
+; select is trivially dereferenceable but may have a lower alignment than the
; load does.
; CHECK-LABEL: @test77(
; CHECK: %[[A:.*]] = alloca i32, align 1
diff --git a/test/Transforms/InstCombine/set.ll b/test/Transforms/InstCombine/set.ll
index db2b4c3558e81..f9f48cfa3637f 100644
--- a/test/Transforms/InstCombine/set.ll
+++ b/test/Transforms/InstCombine/set.ll
@@ -160,14 +160,12 @@ define i1 @bool_eq0(i64 %a) {
ret i1 %and
}
-; FIXME: This is equivalent to the previous test.
+; This is equivalent to the previous test.
define i1 @xor_of_icmps(i64 %a) {
; CHECK-LABEL: @xor_of_icmps(
-; CHECK-NEXT: [[B:%.*]] = icmp sgt i64 %a, 0
-; CHECK-NEXT: [[C:%.*]] = icmp eq i64 %a, 1
-; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[C]], [[B]]
-; CHECK-NEXT: ret i1 [[XOR]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 %a, 1
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%b = icmp sgt i64 %a, 0
%c = icmp eq i64 %a, 1
@@ -175,14 +173,12 @@ define i1 @xor_of_icmps(i64 %a) {
ret i1 %xor
}
-; FIXME: This is also equivalent to the previous test.
+; This is also equivalent to the previous test.
define i1 @xor_of_icmps_commute(i64 %a) {
; CHECK-LABEL: @xor_of_icmps_commute(
-; CHECK-NEXT: [[B:%.*]] = icmp sgt i64 %a, 0
-; CHECK-NEXT: [[C:%.*]] = icmp eq i64 %a, 1
-; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[B]], [[C]]
-; CHECK-NEXT: ret i1 [[XOR]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 %a, 1
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%b = icmp sgt i64 %a, 0
%c = icmp eq i64 %a, 1
@@ -209,10 +205,10 @@ define i1 @xor_of_icmps_folds_more(i64 %a) {
define i32 @PR2844(i32 %x) {
; CHECK-LABEL: @PR2844(
-; CHECK-NEXT: [[A:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT: [[A:%.*]] = icmp ne i32 %x, 0
; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 %x, -638208502
-; CHECK-NEXT: [[NOT_OR:%.*]] = xor i1 [[A]], [[B]]
-; CHECK-NEXT: [[SEL:%.*]] = zext i1 [[NOT_OR]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: [[SEL:%.*]] = zext i1 [[TMP1]] to i32
; CHECK-NEXT: ret i32 [[SEL]]
;
%A = icmp eq i32 %x, 0
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 49e6b999fbce2..3061bdf87904c 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -330,7 +330,7 @@ define i8 @test15(i8 %A, i8 %B) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], [[XOR2]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], -34
; CHECK-NEXT: [[RES:%.*]] = mul i8 [[AND]], [[XOR2]]
; CHECK-NEXT: ret i8 [[RES]]
;
@@ -347,7 +347,7 @@ define i8 @test16(i8 %A, i8 %B) {
; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[B:%.*]], [[A:%.*]]
; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A]], 33
; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[NOT]], [[B]]
-; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR2]], [[XOR1]]
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], -34
; CHECK-NEXT: [[RES:%.*]] = mul i8 [[AND]], [[XOR2]]
; CHECK-NEXT: ret i8 [[RES]]
;
diff --git a/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll b/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
index 1f444b3748a5d..dcef57fc625f0 100644
--- a/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
+++ b/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll
@@ -106,8 +106,22 @@ define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x doubl
; CHECK-LABEL: @store_factorf64_4(
; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x double> [[TMP15]], <16 x double>* [[PTR:%.*]], align 16
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -121,8 +135,22 @@ define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <
; CHECK-LABEL: @store_factori64_4(
; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
-; CHECK-NEXT: store <16 x i64> [[INTERLEAVED_VEC]], <16 x i64>* [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i64> [[TMP15]], <16 x i64>* [[PTR:%.*]], align 16
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -136,8 +164,22 @@ define void @store_factorf64_4_revMask(<16 x double>* %ptr, <4 x double> %v0, <4
; CHECK-LABEL: @store_factorf64_4_revMask(
; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
-; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x double> [[TMP15]], <16 x double>* [[PTR:%.*]], align 16
; CHECK-NEXT: ret void
;
%s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -151,8 +193,22 @@ define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double>
; CHECK-LABEL: @store_factorf64_4_arbitraryMask(
; CHECK-NEXT: [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
-; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], <16 x double>* [[PTR:%.*]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x double> [[TMP15]], <16 x double>* [[PTR:%.*]], align 16
; CHECK-NEXT: ret void
;
%s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
@@ -161,4 +217,3 @@ define void @store_factorf64_4_arbitraryMask(<16 x double>* %ptr, <16 x double>
store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
ret void
}
-
diff --git a/test/Transforms/InterleavedAccess/X86/interleavedStore.ll b/test/Transforms/InterleavedAccess/X86/interleavedStore.ll
new file mode 100644
index 0000000000000..e1ae199b48240
--- /dev/null
+++ b/test/Transforms/InterleavedAccess/X86/interleavedStore.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx -mattr=+avx2 -interleaved-access -S | FileCheck %s
+
+define void @interleaved_store_vf32_i8_stride4(<32 x i8> %x1, <32 x i8> %x2, <32 x i8> %x3, <32 x i8> %x4, <128 x i8>* %p) {
+; CHECK-LABEL: @interleaved_store_vf32_i8_stride4(
+; CHECK-NEXT: [[V1:%.*]] = shufflevector <32 x i8> [[X1:%.*]], <32 x i8> [[X2:%.*]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT: [[V2:%.*]] = shufflevector <32 x i8> [[X3:%.*]], <32 x i8> [[X4:%.*]], <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <64 x i8> [[V1]], <64 x i8> [[V2]], <128 x i32> <i32 0, i32 32, i32 64, i32 96, i32 1, i32 33, i32 65, i32 97, i32 2, i32 34, i32 66, i32 98, i32 3, i32 35, i32 67, i32 99, i32 4, i32 36, i32 68, i32 100, i32 5, i32 37, i32 69, i32 101, i32 6, i32 38, i32 70, i32 102, i32 7, i32 39, i32 71, i32 103, i32 8, i32 40, i32 72, i32 104, i32 9, i32 41, i32 73, i32 105, i32 10, i32 42, i32 74, i32 106, i32 11, i32 43, i32 75, i32 107, i32 12, i32 44, i32 76, i32 108, i32 13, i32 45, i32 77, i32 109, i32 14, i32 46, i32 78, i32 110, i32 15, i32 47, i32 79, i32 111, i32 16, i32 48, i32 80, i32 112, i32 17, i32 49, i32 81, i32 113, i32 18, i32 50, i32 82, i32 114, i32 19, i32 51, i32 83, i32 115, i32 20, i32 52, i32 84, i32 116, i32 21, i32 53, i32 85, i32 117, i32 22, i32 54, i32 86, i32 118, i32 23, i32 55, i32 87, i32 119, i32 24, i32 56, i32 88, i32 120, i32 25, i32 57, i32 89, i32 121, i32 26, i32 58, i32 90, i32 122, i32 27, i32 59, i32 91, i32 123, i32 28, i32 60, i32 92, i32 124, i32 29, i32 61, i32 93, i32 125, i32 30, i32 62, i32 94, i32 126, i32 31, i32 63, i32 95, i32 127>
+; CHECK-NEXT: store <128 x i8> [[INTERLEAVED_VEC]], <128 x i8>* [[P:%.*]]
+; CHECK-NEXT: ret void
+;
+ %v1 = shufflevector <32 x i8> %x1, <32 x i8> %x2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %v2 = shufflevector <32 x i8> %x3, <32 x i8> %x4, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+ %interleaved.vec = shufflevector <64 x i8> %v1, <64 x i8> %v2, <128 x i32> <i32 0, i32 32, i32 64, i32 96, i32 1, i32 33, i32 65, i32 97, i32 2, i32 34, i32 66, i32 98, i32 3, i32 35, i32 67, i32 99, i32 4, i32 36, i32 68, i32 100, i32 5, i32 37, i32 69, i32 101, i32 6, i32 38, i32 70, i32 102, i32 7, i32 39, i32 71, i32 103, i32 8, i32 40, i32 72, i32 104, i32 9, i32 41, i32 73, i32 105, i32 10, i32 42, i32 74, i32 106, i32 11, i32 43, i32 75, i32 107, i32 12, i32 44, i32 76, i32 108, i32 13, i32 45, i32 77, i32 109, i32 14, i32 46, i32 78, i32 110, i32 15, i32 47, i32 79, i32 111, i32 16, i32 48, i32 80, i32 112, i32 17, i32 49, i32 81, i32 113, i32 18, i32 50, i32 82, i32 114, i32 19, i32 51, i32 83, i32 115, i32 20, i32 52, i32 84, i32 116, i32 21, i32 53, i32 85, i32 117, i32 22, i32 54, i32 86, i32 118, i32 23, i32 55, i32 87, i32 119, i32 24, i32 56, i32 88, i32 120, i32 25, i32 57, i32 89, i32 121, i32 26, i32 58, i32 90, i32 122, i32 27, i32 59, i32 91, i32 123, i32 28, i32 60, i32 92, i32 124, i32 29, i32 61, i32 93, i32 125, i32 30, i32 62, i32 94, i32 126, i32 31, i32 63, i32 95, i32 127>
+ store <128 x i8> %interleaved.vec, <128 x i8>* %p
+ ret void
+}
diff --git a/test/Transforms/LICM/strlen.ll b/test/Transforms/LICM/strlen.ll
new file mode 100644
index 0000000000000..27d51c8394d63
--- /dev/null
+++ b/test/Transforms/LICM/strlen.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -inferattrs -basicaa -licm < %s | FileCheck %s
+
+define void @test(i64* noalias %loc, i8* noalias %a) {
+; CHECK-LABEL: @test
+; CHECK: @strlen
+; CHECK-LABEL: loop:
+ br label %loop
+
+loop:
+ %res = call i64 @strlen(i8* %a)
+ store i64 %res, i64* %loc
+ br label %loop
+}
+
+; CHECK: declare i64 @strlen(i8* nocapture) #0
+; CHECK: attributes #0 = { argmemonly nounwind readonly }
+declare i64 @strlen(i8*)
+
+
diff --git a/test/Transforms/LoadCombine/deadcode.ll b/test/Transforms/LoadCombine/deadcode.ll
deleted file mode 100644
index ed72824ffb443..0000000000000
--- a/test/Transforms/LoadCombine/deadcode.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -load-combine -S < %s | FileCheck %s
-
-; It has been detected that dead loops like the one in this test case can be
-; created by -jump-threading (it was detected by a csmith generated program).
-;
-; According to -verify this is valid input (even if it could be discussed if
-; the dead loop really satisfies SSA form).
-;
-; The problem found was that the -load-combine pass ends up in an infinite loop
-; when analysing the 'bb1' basic block.
-define void @test1() {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: ret void
-; CHECK: bb1:
-; CHECK-NEXT: [[_TMP4:%.*]] = load i16, i16* [[_TMP10:%.*]], align 1
-; CHECK-NEXT: [[_TMP10]] = getelementptr i16, i16* [[_TMP10]], i16 1
-; CHECK-NEXT: br label [[BB1:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: [[_TMP7:%.*]] = load i16, i16* [[_TMP12:%.*]], align 1
-; CHECK-NEXT: [[_TMP12]] = getelementptr i16, i16* [[_TMP12]], i16 1
-; CHECK-NEXT: br label [[BB2:%.*]]
-;
- ret void
-
-bb1:
- %_tmp4 = load i16, i16* %_tmp10, align 1
- %_tmp10 = getelementptr i16, i16* %_tmp10, i16 1
- br label %bb1
-
-; A second basic block. Running the test with -debug-pass=Executions shows
-; that we only run the Dominator Tree Construction one time for each function,
-; also when having multiple basic blocks in the function.
-bb2:
- %_tmp7 = load i16, i16* %_tmp12, align 1
- %_tmp12 = getelementptr i16, i16* %_tmp12, i16 1
- br label %bb2
-
-}
diff --git a/test/Transforms/LoadCombine/load-combine-aa.ll b/test/Transforms/LoadCombine/load-combine-aa.ll
deleted file mode 100644
index 5a577516fb47d..0000000000000
--- a/test/Transforms/LoadCombine/load-combine-aa.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) {
-; CHECK-LABEL: @test1
-
-; CHECK: load i64, i64*
-; CHECK: ret i64
-
- %load1 = load i32, i32* %a, align 4
- %conv = zext i32 %load1 to i64
- %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
- store i32 %load1, i32* %b, align 4
- %load2 = load i32, i32* %arrayidx1, align 4
- %conv2 = zext i32 %load2 to i64
- %shl = shl nuw i64 %conv2, 32
- %add = or i64 %shl, %conv
- ret i64 %add
-}
-
-define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) {
-; CHECK-LABEL: @test2
-
-; CHECK-NOT: load i64
-; CHECK: load i32, i32*
-; CHECK: load i32, i32*
-; CHECK: ret i64
-
- %load1 = load i32, i32* %a, align 4
- %conv = zext i32 %load1 to i64
- %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
- store i32 %load1, i32* %b, align 4
- %load2 = load i32, i32* %arrayidx1, align 4
- %conv2 = zext i32 %load2 to i64
- %shl = shl nuw i64 %conv2, 32
- %add = or i64 %shl, %conv
- ret i64 %add
-}
-
-%rec11 = type { i16, i16, i16 }
-@str = global %rec11 { i16 1, i16 2, i16 3 }
-
-; PR31517 - Check that loads which span an aliasing store are not combined.
-define i16 @test3() {
-; CHECK-LABEL: @test3
-
-; CHECK-NOT: load i32
-; CHECK: load i16, i16*
-; CHECK: store i16
-; CHECK: load i16, i16*
-; CHECK: ret i16
-
- %_tmp9 = getelementptr %rec11, %rec11* @str, i16 0, i32 1
- %_tmp10 = load i16, i16* %_tmp9
- %_tmp12 = getelementptr %rec11, %rec11* @str, i16 0, i32 0
- store i16 %_tmp10, i16* %_tmp12
- %_tmp13 = getelementptr %rec11, %rec11* @str, i16 0, i32 0
- %_tmp14 = load i16, i16* %_tmp13
- %_tmp15 = icmp eq i16 %_tmp14, 3
- %_tmp16 = select i1 %_tmp15, i16 1, i16 0
- ret i16 %_tmp16
-}
diff --git a/test/Transforms/LoadCombine/load-combine-assume.ll b/test/Transforms/LoadCombine/load-combine-assume.ll
deleted file mode 100644
index 2d6d160f12fe8..0000000000000
--- a/test/Transforms/LoadCombine/load-combine-assume.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.assume(i1) nounwind
-
-; 'load' before the 'call' gets optimized:
-define i64 @test1(i32* nocapture readonly %a, i1 %b) {
-; CHECK-LABEL: @test1
-
-; CHECK-DAG: load i64, i64* %1, align 4
-; CHECK-DAG: tail call void @llvm.assume(i1 %b)
-; CHECK: ret i64
-
- %load1 = load i32, i32* %a, align 4
- %conv = zext i32 %load1 to i64
- %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
- %load2 = load i32, i32* %arrayidx1, align 4
- tail call void @llvm.assume(i1 %b)
- %conv2 = zext i32 %load2 to i64
- %shl = shl nuw i64 %conv2, 32
- %add = or i64 %shl, %conv
- ret i64 %add
-}
-
-; 'call' before the 'load' doesn't get optimized:
-define i64 @test2(i32* nocapture readonly %a, i1 %b) {
-; CHECK-LABEL: @test2
-
-; CHECK-DAG: load i64, i64* %1, align 4
-; CHECK-DAG: tail call void @llvm.assume(i1 %b)
-; CHECK: ret i64
-
- %load1 = load i32, i32* %a, align 4
- %conv = zext i32 %load1 to i64
- %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
- tail call void @llvm.assume(i1 %b)
- %load2 = load i32, i32* %arrayidx1, align 4
- %conv2 = zext i32 %load2 to i64
- %shl = shl nuw i64 %conv2, 32
- %add = or i64 %shl, %conv
- ret i64 %add
-}
-
diff --git a/test/Transforms/LoadCombine/load-combine-negativegep.ll b/test/Transforms/LoadCombine/load-combine-negativegep.ll
deleted file mode 100644
index 7c5700b429548..0000000000000
--- a/test/Transforms/LoadCombine/load-combine-negativegep.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define i32 @Load_NegGep(i32* %i){
- %1 = getelementptr inbounds i32, i32* %i, i64 -1
- %2 = load i32, i32* %1, align 4
- %3 = load i32, i32* %i, align 4
- %4 = add nsw i32 %3, %2
- ret i32 %4
-; CHECK-LABEL: @Load_NegGep(
-; CHECK: %[[load:.*]] = load i64
-; CHECK: %[[combine_extract_lo:.*]] = trunc i64 %[[load]] to i32
-; CHECK: %[[combine_extract_shift:.*]] = lshr i64 %[[load]], 32
-; CHECK: %[[combine_extract_hi:.*]] = trunc i64 %[[combine_extract_shift]] to i32
-; CHECK: %[[add:.*]] = add nsw i32 %[[combine_extract_hi]], %[[combine_extract_lo]]
-}
-
-
diff --git a/test/Transforms/LoadCombine/load-combine.ll b/test/Transforms/LoadCombine/load-combine.ll
deleted file mode 100644
index d5068787639fc..0000000000000
--- a/test/Transforms/LoadCombine/load-combine.ll
+++ /dev/null
@@ -1,190 +0,0 @@
-; RUN: opt < %s -load-combine -instcombine -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Combine read from char* idiom.
-define i64 @LoadU64_x64_0(i64* %pData) {
- %1 = bitcast i64* %pData to i8*
- %2 = load i8, i8* %1, align 1
- %3 = zext i8 %2 to i64
- %4 = shl nuw i64 %3, 56
- %5 = getelementptr inbounds i8, i8* %1, i64 1
- %6 = load i8, i8* %5, align 1
- %7 = zext i8 %6 to i64
- %8 = shl nuw nsw i64 %7, 48
- %9 = or i64 %8, %4
- %10 = getelementptr inbounds i8, i8* %1, i64 2
- %11 = load i8, i8* %10, align 1
- %12 = zext i8 %11 to i64
- %13 = shl nuw nsw i64 %12, 40
- %14 = or i64 %9, %13
- %15 = getelementptr inbounds i8, i8* %1, i64 3
- %16 = load i8, i8* %15, align 1
- %17 = zext i8 %16 to i64
- %18 = shl nuw nsw i64 %17, 32
- %19 = or i64 %14, %18
- %20 = getelementptr inbounds i8, i8* %1, i64 4
- %21 = load i8, i8* %20, align 1
- %22 = zext i8 %21 to i64
- %23 = shl nuw nsw i64 %22, 24
- %24 = or i64 %19, %23
- %25 = getelementptr inbounds i8, i8* %1, i64 5
- %26 = load i8, i8* %25, align 1
- %27 = zext i8 %26 to i64
- %28 = shl nuw nsw i64 %27, 16
- %29 = or i64 %24, %28
- %30 = getelementptr inbounds i8, i8* %1, i64 6
- %31 = load i8, i8* %30, align 1
- %32 = zext i8 %31 to i64
- %33 = shl nuw nsw i64 %32, 8
- %34 = or i64 %29, %33
- %35 = getelementptr inbounds i8, i8* %1, i64 7
- %36 = load i8, i8* %35, align 1
- %37 = zext i8 %36 to i64
- %38 = or i64 %34, %37
- ret i64 %38
-; CHECK-LABEL: @LoadU64_x64_0(
-; CHECK: load i64, i64* %{{.*}}, align 1
-; CHECK-NOT: load
-}
-
-; Combine simple adjacent loads.
-define i32 @"2xi16_i32"(i16* %x) {
- %1 = load i16, i16* %x, align 2
- %2 = getelementptr inbounds i16, i16* %x, i64 1
- %3 = load i16, i16* %2, align 2
- %4 = zext i16 %3 to i32
- %5 = shl nuw i32 %4, 16
- %6 = zext i16 %1 to i32
- %7 = or i32 %5, %6
- ret i32 %7
-; CHECK-LABEL: @"2xi16_i32"(
-; CHECK: load i32, i32* %{{.*}}, align 2
-; CHECK-NOT: load
-}
-
-; Don't combine loads across stores.
-define i32 @"2xi16_i32_store"(i16* %x, i16* %y) {
- %1 = load i16, i16* %x, align 2
- store i16 0, i16* %y, align 2
- %2 = getelementptr inbounds i16, i16* %x, i64 1
- %3 = load i16, i16* %2, align 2
- %4 = zext i16 %3 to i32
- %5 = shl nuw i32 %4, 16
- %6 = zext i16 %1 to i32
- %7 = or i32 %5, %6
- ret i32 %7
-; CHECK-LABEL: @"2xi16_i32_store"(
-; CHECK: load i16, i16* %{{.*}}, align 2
-; CHECK: store
-; CHECK: load i16, i16* %{{.*}}, align 2
-}
-
-; Don't combine loads with a gap.
-define i32 @"2xi16_i32_gap"(i16* %x) {
- %1 = load i16, i16* %x, align 2
- %2 = getelementptr inbounds i16, i16* %x, i64 2
- %3 = load i16, i16* %2, align 2
- %4 = zext i16 %3 to i32
- %5 = shl nuw i32 %4, 16
- %6 = zext i16 %1 to i32
- %7 = or i32 %5, %6
- ret i32 %7
-; CHECK-LABEL: @"2xi16_i32_gap"(
-; CHECK: load i16, i16* %{{.*}}, align 2
-; CHECK: load i16, i16* %{{.*}}, align 2
-}
-
-; Combine out of order loads.
-define i32 @"2xi16_i32_order"(i16* %x) {
- %1 = getelementptr inbounds i16, i16* %x, i64 1
- %2 = load i16, i16* %1, align 2
- %3 = zext i16 %2 to i32
- %4 = load i16, i16* %x, align 2
- %5 = shl nuw i32 %3, 16
- %6 = zext i16 %4 to i32
- %7 = or i32 %5, %6
- ret i32 %7
-; CHECK-LABEL: @"2xi16_i32_order"(
-; CHECK: load i32, i32* %{{.*}}, align 2
-; CHECK-NOT: load
-}
-
-; Overlapping loads.
-define i32 @"2xi16_i32_overlap"(i8* %x) {
- %1 = bitcast i8* %x to i16*
- %2 = load i16, i16* %1, align 2
- %3 = getelementptr inbounds i8, i8* %x, i64 1
- %4 = bitcast i8* %3 to i16*
- %5 = load i16, i16* %4, align 2
- %6 = zext i16 %5 to i32
- %7 = shl nuw i32 %6, 16
- %8 = zext i16 %2 to i32
- %9 = or i32 %7, %8
- ret i32 %9
-; CHECK-LABEL: @"2xi16_i32_overlap"(
-; CHECK: load i16, i16* %{{.*}}, align 2
-; CHECK: load i16, i16* %{{.*}}, align 2
-}
-
-; Combine valid alignments.
-define i64 @"2xi16_i64_align"(i8* %x) {
- %1 = bitcast i8* %x to i32*
- %2 = load i32, i32* %1, align 4
- %3 = getelementptr inbounds i8, i8* %x, i64 4
- %4 = bitcast i8* %3 to i16*
- %5 = load i16, i16* %4, align 2
- %6 = getelementptr inbounds i8, i8* %x, i64 6
- %7 = bitcast i8* %6 to i16*
- %8 = load i16, i16* %7, align 2
- %9 = zext i16 %8 to i64
- %10 = shl nuw i64 %9, 48
- %11 = zext i16 %5 to i64
- %12 = shl nuw nsw i64 %11, 32
- %13 = zext i32 %2 to i64
- %14 = or i64 %12, %13
- %15 = or i64 %14, %10
- ret i64 %15
-; CHECK-LABEL: @"2xi16_i64_align"(
-; CHECK: load i64, i64* %{{.*}}, align 4
-}
-
-; Non power of two.
-define i64 @"2xi16_i64_npo2"(i8* %x) {
- %1 = load i8, i8* %x, align 1
- %2 = zext i8 %1 to i64
- %3 = getelementptr inbounds i8, i8* %x, i64 1
- %4 = load i8, i8* %3, align 1
- %5 = zext i8 %4 to i64
- %6 = shl nuw nsw i64 %5, 8
- %7 = or i64 %6, %2
- %8 = getelementptr inbounds i8, i8* %x, i64 2
- %9 = load i8, i8* %8, align 1
- %10 = zext i8 %9 to i64
- %11 = shl nuw nsw i64 %10, 16
- %12 = or i64 %11, %7
- %13 = getelementptr inbounds i8, i8* %x, i64 3
- %14 = load i8, i8* %13, align 1
- %15 = zext i8 %14 to i64
- %16 = shl nuw nsw i64 %15, 24
- %17 = or i64 %16, %12
- %18 = getelementptr inbounds i8, i8* %x, i64 4
- %19 = load i8, i8* %18, align 1
- %20 = zext i8 %19 to i64
- %21 = shl nuw nsw i64 %20, 32
- %22 = or i64 %21, %17
- %23 = getelementptr inbounds i8, i8* %x, i64 5
- %24 = load i8, i8* %23, align 1
- %25 = zext i8 %24 to i64
- %26 = shl nuw nsw i64 %25, 40
- %27 = or i64 %26, %22
- %28 = getelementptr inbounds i8, i8* %x, i64 6
- %29 = load i8, i8* %28, align 1
- %30 = zext i8 %29 to i64
- %31 = shl nuw nsw i64 %30, 48
- %32 = or i64 %31, %27
- ret i64 %32
-; CHECK-LABEL: @"2xi16_i64_npo2"(
-; CHECK: load i32, i32* %{{.*}}, align 1
-}
diff --git a/test/Transforms/LoopDeletion/unreachable-loops.ll b/test/Transforms/LoopDeletion/unreachable-loops.ll
index 147a85670121e..5bdaeb11232a5 100644
--- a/test/Transforms/LoopDeletion/unreachable-loops.ll
+++ b/test/Transforms/LoopDeletion/unreachable-loops.ll
@@ -334,3 +334,79 @@ exit:
ret void
}
+
+; 2 edges from a single exiting block to the exit block.
+define i64 @test12(i64 %n){
+;CHECK-LABEL: @test12
+; CHECK-NOT: L1:
+; CHECK-NOT: L1Latch:
+; CHECK-LABEL: L1.preheader:
+; CHECK-NEXT: br label %exit
+; CHECK-LABEL: exit:
+; CHECK-NEXT: %y.phi = phi i64 [ undef, %L1.preheader ]
+; CHECK-NEXT: ret i64 %y.phi
+
+entry:
+ br i1 true, label %exit1, label %L1
+
+exit1:
+ ret i64 42
+
+L1: ; preds = %L1Latch, %entry
+ %y.next = phi i64 [ 0, %entry ], [ %y.add, %L1Latch ]
+ br i1 true, label %L1Latch, label %exit
+
+L1Latch: ; preds = %L1
+ %y = phi i64 [ %y.next, %L1 ]
+ %y.add = add i64 %y, %n
+ %cond2 = icmp eq i64 %y.add, 42
+ switch i64 %n, label %L1 [
+ i64 10, label %exit
+ i64 20, label %exit
+ ]
+
+exit: ; preds = %L1Latch, %L1Latch
+ %y.phi = phi i64 [ 10, %L1Latch ], [ 10, %L1Latch ], [ %y.next, %L1]
+ ret i64 %y.phi
+}
+
+; multiple edges to exit block from the same exiting blocks
+define i64 @test13(i64 %n) {
+; CHECK-LABEL: @test13
+; CHECK-NOT: L1:
+; CHECK-NOT: L1Latch:
+; CHECK-LABEL: L1.preheader:
+; CHECK-NEXT: br label %exit
+; CHECK-LABEL: exit:
+; CHECK-NEXT: %y.phi = phi i64 [ undef, %L1.preheader ]
+; CHECK-NEXT: ret i64 %y.phi
+
+entry:
+ br i1 true, label %exit1, label %L1
+
+exit1:
+ ret i64 42
+
+L1: ; preds = %L1Latch, %entry
+ %y.next = phi i64 [ 0, %entry ], [ %y.add, %L1Latch ]
+ br i1 true, label %L1Block, label %exit
+
+L1Block: ; preds = %L1
+ %y = phi i64 [ %y.next, %L1 ]
+ %y.add = add i64 %y, %n
+ %cond2 = icmp eq i64 %y.add, 42
+ switch i64 %n, label %L1Latch [
+ i64 10, label %exit
+ i64 20, label %exit
+ ]
+
+L1Latch:
+ switch i64 %n, label %L1 [
+ i64 30, label %exit
+ i64 40, label %exit
+ ]
+
+exit: ; preds = %L1Block, %L1, %L1Latch
+ %y.phi = phi i64 [ 10, %L1Block ], [ 10, %L1Block ], [ %y.next, %L1 ], [ 30, %L1Latch ], [ 30, %L1Latch ]
+ ret i64 %y.phi
+}
diff --git a/test/Transforms/LoopRotate/catchret.ll b/test/Transforms/LoopRotate/catchret.ll
new file mode 100755
index 0000000000000..c035e49d79c21
--- /dev/null
+++ b/test/Transforms/LoopRotate/catchret.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -loop-rotate -S | FileCheck %s
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @always_throws()
+
+define i32 @test() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+ invoke void @always_throws()
+ to label %continue unwind label %catch.dispatch
+
+continue:
+ unreachable
+
+catch.dispatch:
+ %t0 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %t1 = catchpad within %t0 [i8* null, i32 64, i8* null]
+ catchret from %t1 to label %for.cond
+
+for.cond:
+ %sum = phi i32 [ %add, %for.body ], [ 0, %catch ]
+ %i = phi i32 [ %inc, %for.body ], [ 0, %catch ]
+ %cmp = icmp slt i32 %i, 1
+ br i1 %cmp, label %for.body, label %return
+
+for.body:
+ %add = add nsw i32 1, %sum
+ %inc = add nsw i32 %i, 1
+ br label %for.cond
+
+return:
+ ret i32 0
+}
+
+; CHECK: catch:
+; CHECK-NEXT: catchpad
+; CHECK-NEXT: catchret
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LoopSimplify/basictest.ll b/test/Transforms/LoopSimplify/basictest.ll
index 81b2c80fae008..e5fb7b9907bd4 100644
--- a/test/Transforms/LoopSimplify/basictest.ll
+++ b/test/Transforms/LoopSimplify/basictest.ll
@@ -1,17 +1,236 @@
-; RUN: opt < %s -loop-simplify
-; RUN: opt < %s -passes=loop-simplify
+; RUN: opt < %s -S -loop-simplify | FileCheck %s
+; RUN: opt < %s -S -passes=loop-simplify | FileCheck %s
-; This function should get a preheader inserted before BB3, that is jumped
-; to by BB1 & BB2
+; This function should get a preheader inserted before bb3, that is jumped
+; to by bb1 & bb2
+define void @test() {
+; CHECK-LABEL: define void @test(
+entry:
+ br i1 true, label %bb1, label %bb2
+
+bb1:
+ br label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: br label %[[PH:.*]]
+
+bb2:
+ br label %bb3
+; CHECK: bb2:
+; CHECK-NEXT: br label %[[PH]]
+
+bb3:
+ br label %bb3
+; CHECK: [[PH]]:
+; CHECK-NEXT: br label %bb3
+;
+; CHECK: bb3:
+; CHECK-NEXT: br label %bb3
+}
+
+; Test a case where we have multiple exit blocks as successors of a single loop
+; block that need to be made dedicated exit blocks. We also have multiple
+; exiting edges to one of the exit blocks that all should be rewritten.
+define void @test_multiple_exits_from_single_block(i8 %a, i8* %b.ptr) {
+; CHECK-LABEL: define void @test_multiple_exits_from_single_block(
+entry:
+ switch i8 %a, label %loop [
+ i8 0, label %exit.a
+ i8 1, label %exit.b
+ ]
+; CHECK: entry:
+; CHECK-NEXT: switch i8 %a, label %[[PH:.*]] [
+; CHECK-NEXT: i8 0, label %exit.a
+; CHECK-NEXT: i8 1, label %exit.b
+; CHECK-NEXT: ]
+
+loop:
+ %b = load volatile i8, i8* %b.ptr
+ switch i8 %b, label %loop [
+ i8 0, label %exit.a
+ i8 1, label %exit.b
+ i8 2, label %loop
+ i8 3, label %exit.a
+ i8 4, label %loop
+ i8 5, label %exit.a
+ i8 6, label %loop
+ ]
+; CHECK: [[PH]]:
+; CHECK-NEXT: br label %loop
;
+; CHECK: loop:
+; CHECK-NEXT: %[[B:.*]] = load volatile i8, i8* %b.ptr
+; CHECK-NEXT: switch i8 %[[B]], label %[[BACKEDGE:.*]] [
+; CHECK-NEXT: i8 0, label %[[LOOPEXIT_A:.*]]
+; CHECK-NEXT: i8 1, label %[[LOOPEXIT_B:.*]]
+; CHECK-NEXT: i8 2, label %[[BACKEDGE]]
+; CHECK-NEXT: i8 3, label %[[LOOPEXIT_A]]
+; CHECK-NEXT: i8 4, label %[[BACKEDGE]]
+; CHECK-NEXT: i8 5, label %[[LOOPEXIT_A]]
+; CHECK-NEXT: i8 6, label %[[BACKEDGE]]
+; CHECK-NEXT: ]
+;
+; CHECK: [[BACKEDGE]]:
+; CHECK-NEXT: br label %loop
-define void @test() {
- br i1 true, label %BB1, label %BB2
-BB1: ; preds = %0
- br label %BB3
-BB2: ; preds = %0
- br label %BB3
-BB3: ; preds = %BB3, %BB2, %BB1
- br label %BB3
+exit.a:
+ ret void
+; CHECK: [[LOOPEXIT_A]]:
+; CHECK-NEXT: br label %exit.a
+;
+; CHECK: exit.a:
+; CHECK-NEXT: ret void
+
+exit.b:
+ ret void
+; CHECK: [[LOOPEXIT_B]]:
+; CHECK-NEXT: br label %exit.b
+;
+; CHECK: exit.b:
+; CHECK-NEXT: ret void
+}
+
+; Check that we leave already dedicated exits alone when forming dedicated exit
+; blocks.
+define void @test_pre_existing_dedicated_exits(i1 %a, i1* %ptr) {
+; CHECK-LABEL: define void @test_pre_existing_dedicated_exits(
+entry:
+ br i1 %a, label %loop.ph, label %non_dedicated_exit
+; CHECK: entry:
+; CHECK-NEXT: br i1 %a, label %loop.ph, label %non_dedicated_exit
+
+loop.ph:
+ br label %loop.header
+; CHECK: loop.ph:
+; CHECK-NEXT: br label %loop.header
+
+loop.header:
+ %c1 = load volatile i1, i1* %ptr
+ br i1 %c1, label %loop.body1, label %dedicated_exit1
+; CHECK: loop.header:
+; CHECK-NEXT: %[[C1:.*]] = load volatile i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[C1]], label %loop.body1, label %dedicated_exit1
+
+loop.body1:
+ %c2 = load volatile i1, i1* %ptr
+ br i1 %c2, label %loop.body2, label %non_dedicated_exit
+; CHECK: loop.body1:
+; CHECK-NEXT: %[[C2:.*]] = load volatile i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[C2]], label %loop.body2, label %[[LOOPEXIT:.*]]
+
+loop.body2:
+ %c3 = load volatile i1, i1* %ptr
+ br i1 %c3, label %loop.backedge, label %dedicated_exit2
+; CHECK: loop.body2:
+; CHECK-NEXT: %[[C3:.*]] = load volatile i1, i1* %ptr
+; CHECK-NEXT: br i1 %[[C3]], label %loop.backedge, label %dedicated_exit2
+
+loop.backedge:
+ br label %loop.header
+; CHECK: loop.backedge:
+; CHECK-NEXT: br label %loop.header
+
+dedicated_exit1:
+ ret void
+; Check that there isn't a split loop exit.
+; CHECK-NOT: br label %dedicated_exit1
+;
+; CHECK: dedicated_exit1:
+; CHECK-NEXT: ret void
+
+dedicated_exit2:
+ ret void
+; Check that there isn't a split loop exit.
+; CHECK-NOT: br label %dedicated_exit2
+;
+; CHECK: dedicated_exit2:
+; CHECK-NEXT: ret void
+
+non_dedicated_exit:
+ ret void
+; CHECK: [[LOOPEXIT]]:
+; CHECK-NEXT: br label %non_dedicated_exit
+;
+; CHECK: non_dedicated_exit:
+; CHECK-NEXT: ret void
}
+; Check that we form what dedicated exits we can even when some exits are
+; reached via indirectbr which precludes forming dedicated exits.
+define void @test_form_some_dedicated_exits_despite_indirectbr(i8 %a, i8* %ptr, i8** %addr.ptr) {
+; CHECK-LABEL: define void @test_form_some_dedicated_exits_despite_indirectbr(
+entry:
+ switch i8 %a, label %loop.ph [
+ i8 0, label %exit.a
+ i8 1, label %exit.b
+ i8 2, label %exit.c
+ ]
+; CHECK: entry:
+; CHECK-NEXT: switch i8 %a, label %loop.ph [
+; CHECK-NEXT: i8 0, label %exit.a
+; CHECK-NEXT: i8 1, label %exit.b
+; CHECK-NEXT: i8 2, label %exit.c
+; CHECK-NEXT: ]
+
+loop.ph:
+ br label %loop.header
+; CHECK: loop.ph:
+; CHECK-NEXT: br label %loop.header
+
+loop.header:
+ %addr1 = load volatile i8*, i8** %addr.ptr
+ indirectbr i8* %addr1, [label %loop.body1, label %exit.a]
+; CHECK: loop.header:
+; CHECK-NEXT: %[[ADDR1:.*]] = load volatile i8*, i8** %addr.ptr
+; CHECK-NEXT: indirectbr i8* %[[ADDR1]], [label %loop.body1, label %exit.a]
+
+loop.body1:
+ %b = load volatile i8, i8* %ptr
+ switch i8 %b, label %loop.body2 [
+ i8 0, label %exit.a
+ i8 1, label %exit.b
+ i8 2, label %exit.c
+ ]
+; CHECK: loop.body1:
+; CHECK-NEXT: %[[B:.*]] = load volatile i8, i8* %ptr
+; CHECK-NEXT: switch i8 %[[B]], label %loop.body2 [
+; CHECK-NEXT: i8 0, label %exit.a
+; CHECK-NEXT: i8 1, label %[[LOOPEXIT:.*]]
+; CHECK-NEXT: i8 2, label %exit.c
+; CHECK-NEXT: ]
+
+loop.body2:
+ %addr2 = load volatile i8*, i8** %addr.ptr
+ indirectbr i8* %addr2, [label %loop.backedge, label %exit.c]
+; CHECK: loop.body2:
+; CHECK-NEXT: %[[ADDR2:.*]] = load volatile i8*, i8** %addr.ptr
+; CHECK-NEXT: indirectbr i8* %[[ADDR2]], [label %loop.backedge, label %exit.c]
+
+loop.backedge:
+ br label %loop.header
+; CHECK: loop.backedge:
+; CHECK-NEXT: br label %loop.header
+
+exit.a:
+ ret void
+; Check that there isn't a split loop exit.
+; CHECK-NOT: br label %exit.a
+;
+; CHECK: exit.a:
+; CHECK-NEXT: ret void
+
+exit.b:
+ ret void
+; CHECK: [[LOOPEXIT]]:
+; CHECK-NEXT: br label %exit.b
+;
+; CHECK: exit.b:
+; CHECK-NEXT: ret void
+
+exit.c:
+ ret void
+; Check that there isn't a split loop exit.
+; CHECK-NOT: br label %exit.c
+;
+; CHECK: exit.c:
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/bin_power.ll b/test/Transforms/LoopStrengthReduce/X86/bin_power.ll
new file mode 100644
index 0000000000000..35cb28da7d55d
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/bin_power.ll
@@ -0,0 +1,264 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Show that the b^2 is expanded correctly.
+define i32 @test_01(i32 %a) {
+; CHECK-LABEL: @test_01
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
+; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
+; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
+; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
+; CHECK: exit:
+; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
+; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
+; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B2]], -1
+; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
+; CHECK-NEXT: ret i32 [[R2]]
+
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %b = add i32 %a, 1
+ %b.pow.2 = mul i32 %b, %b
+ %result = add i32 %b.pow.2, %indvars.iv
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 80
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %result
+}
+
+; Show that b^8 is expanded correctly.
+define i32 @test_02(i32 %a) {
+; CHECK-LABEL: @test_02
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
+; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
+; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
+; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
+; CHECK: exit:
+; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
+; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
+; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
+; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
+; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B8]], -1
+; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
+; CHECK-NEXT: ret i32 [[R2]]
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %b = add i32 %a, 1
+ %b.pow.2 = mul i32 %b, %b
+ %b.pow.4 = mul i32 %b.pow.2, %b.pow.2
+ %b.pow.8 = mul i32 %b.pow.4, %b.pow.4
+ %result = add i32 %b.pow.8, %indvars.iv
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 80
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %result
+}
+
+; Show that b^27 (27 = 1 + 2 + 8 + 16) is expanded correctly.
+define i32 @test_03(i32 %a) {
+; CHECK-LABEL: @test_03
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
+; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
+; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
+; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
+; CHECK: exit:
+; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
+; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
+; CHECK-NEXT: [[B3:[^ ]+]] = mul i32 [[B]], [[B2]]
+; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
+; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
+; CHECK-NEXT: [[B11:[^ ]+]] = mul i32 [[B3]], [[B8]]
+; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
+; CHECK-NEXT: [[B27:[^ ]+]] = mul i32 [[B11]], [[B16]]
+; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B27]], -1
+; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
+; CHECK-NEXT: ret i32 [[R2]]
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %b = add i32 %a, 1
+ %b.pow.2 = mul i32 %b, %b
+ %b.pow.4 = mul i32 %b.pow.2, %b.pow.2
+ %b.pow.8 = mul i32 %b.pow.4, %b.pow.4
+ %b.pow.16 = mul i32 %b.pow.8, %b.pow.8
+ %b.pow.24 = mul i32 %b.pow.16, %b.pow.8
+ %b.pow.25 = mul i32 %b.pow.24, %b
+ %b.pow.26 = mul i32 %b.pow.25, %b
+ %b.pow.27 = mul i32 %b.pow.26, %b
+ %result = add i32 %b.pow.27, %indvars.iv
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 80
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %result
+}
+
+; Show how linear calculation of b^16 is turned into logarithmic.
+define i32 @test_04(i32 %a) {
+; CHECK-LABEL: @test_04
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
+; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
+; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
+; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
+; CHECK: exit:
+; CHECK-NEXT: [[B:[^ ]+]] = add i32 %a, 1
+; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
+; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
+; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
+; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
+; CHECK-NEXT: [[R1:[^ ]+]] = add i32 [[B16]], -1
+; CHECK-NEXT: [[R2:[^ ]+]] = sub i32 [[R1]], [[IV_INC]]
+; CHECK-NEXT: ret i32 [[R2]]
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %b = add i32 %a, 1
+ %b.pow.2 = mul i32 %b, %b
+ %b.pow.3 = mul i32 %b.pow.2, %b
+ %b.pow.4 = mul i32 %b.pow.3, %b
+ %b.pow.5 = mul i32 %b.pow.4, %b
+ %b.pow.6 = mul i32 %b.pow.5, %b
+ %b.pow.7 = mul i32 %b.pow.6, %b
+ %b.pow.8 = mul i32 %b.pow.7, %b
+ %b.pow.9 = mul i32 %b.pow.8, %b
+ %b.pow.10 = mul i32 %b.pow.9, %b
+ %b.pow.11 = mul i32 %b.pow.10, %b
+ %b.pow.12 = mul i32 %b.pow.11, %b
+ %b.pow.13 = mul i32 %b.pow.12, %b
+ %b.pow.14 = mul i32 %b.pow.13, %b
+ %b.pow.15 = mul i32 %b.pow.14, %b
+ %b.pow.16 = mul i32 %b.pow.15, %b
+ %result = add i32 %b.pow.16, %indvars.iv
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 80
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %result
+}
+
+; The output here is reasonably big, we just check that the amount of expanded
+; instructions is sane.
+define i32 @test_05(i32 %a) {
+; CHECK-LABEL: @test_05
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
+; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
+; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
+; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
+; CHECK: exit:
+; CHECK: %100
+; CHECK-NOT: %150
+
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %tmp3 = add i32 %a, 1
+ %tmp4 = mul i32 %tmp3, %tmp3
+ %tmp5 = mul i32 %tmp4, %tmp4
+ %tmp6 = mul i32 %tmp5, %tmp5
+ %tmp7 = mul i32 %tmp6, %tmp6
+ %tmp8 = mul i32 %tmp7, %tmp7
+ %tmp9 = mul i32 %tmp8, %tmp8
+ %tmp10 = mul i32 %tmp9, %tmp9
+ %tmp11 = mul i32 %tmp10, %tmp10
+ %tmp12 = mul i32 %tmp11, %tmp11
+ %tmp13 = mul i32 %tmp12, %tmp12
+ %tmp14 = mul i32 %tmp13, %tmp13
+ %tmp15 = mul i32 %tmp14, %tmp14
+ %tmp16 = mul i32 %tmp15, %tmp15
+ %tmp17 = mul i32 %tmp16, %tmp16
+ %tmp18 = mul i32 %tmp17, %tmp17
+ %tmp19 = mul i32 %tmp18, %tmp18
+ %tmp20 = mul i32 %tmp19, %tmp19
+ %tmp22 = add i32 %tmp20, %indvars.iv
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 80
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %tmp22
+}
+
+; Show that the transformation works even if the calculation involves different
+; values inside.
+define i32 @test_06(i32 %a, i32 %c) {
+; CHECK-LABEL: @test_06
+; CHECK: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: [[IV:[^ ]+]] = phi i32 [ [[IV_INC:[^ ]+]], %loop ], [ 0, %entry ]
+; CHECK-NEXT: [[IV_INC]] = add nsw i32 [[IV]], -1
+; CHECK-NEXT: [[EXITCOND:[^ ]+]] = icmp eq i32 [[IV_INC]], -80
+; CHECK-NEXT: br i1 [[EXITCOND]], label %exit, label %loop
+; CHECK: exit:
+; CHECK: [[B:[^ ]+]] = add i32 %a, 1
+; CHECK-NEXT: [[B2:[^ ]+]] = mul i32 [[B]], [[B]]
+; CHECK-NEXT: [[B4:[^ ]+]] = mul i32 [[B2]], [[B2]]
+; CHECK-NEXT: [[B8:[^ ]+]] = mul i32 [[B4]], [[B4]]
+; CHECK-NEXT: [[B16:[^ ]+]] = mul i32 [[B8]], [[B8]]
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %b = add i32 %a, 1
+ %b.pow.2.tmp = mul i32 %b, %b
+ %b.pow.2 = mul i32 %b.pow.2.tmp, %c
+ %b.pow.3 = mul i32 %b.pow.2, %b
+ %b.pow.4 = mul i32 %b.pow.3, %b
+ %b.pow.5 = mul i32 %b.pow.4, %b
+ %b.pow.6.tmp = mul i32 %b.pow.5, %b
+ %b.pow.6 = mul i32 %b.pow.6.tmp, %c
+ %b.pow.7 = mul i32 %b.pow.6, %b
+ %b.pow.8 = mul i32 %b.pow.7, %b
+ %b.pow.9 = mul i32 %b.pow.8, %b
+ %b.pow.10 = mul i32 %b.pow.9, %b
+ %b.pow.11 = mul i32 %b.pow.10, %b
+ %b.pow.12.tmp = mul i32 %b.pow.11, %b
+ %b.pow.12 = mul i32 %c, %b.pow.12.tmp
+ %b.pow.13 = mul i32 %b.pow.12, %b
+ %b.pow.14 = mul i32 %b.pow.13, %b
+ %b.pow.15 = mul i32 %b.pow.14, %b
+ %b.pow.16 = mul i32 %b.pow.15, %b
+ %result = add i32 %b.pow.16, %indvars.iv
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, 80
+ br i1 %exitcond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret i32 %result
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/canonical.ll b/test/Transforms/LoopStrengthReduce/X86/canonical.ll
index 6b6acb8687454..2dafbb408aad4 100644
--- a/test/Transforms/LoopStrengthReduce/X86/canonical.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/canonical.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s | FileCheck %s
; Check LSR formula canonicalization will put loop invariant regs before
; induction variable of current loop, so exprs involving loop invariant regs
; can be promoted outside of current loop.
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index 7c01432914fff..fb63b66137f37 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -163,7 +163,7 @@ for.end: ; preds = %for.body, %entry
; X64: movzbl -3(
;
; X32: foldedidx:
-; X32: movzbl 400(
+; X32: movzbl -3(
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
entry:
br label %for.body
@@ -275,7 +275,7 @@ exit:
;
; X32: @testCmpZero
; X32: %for.body82.us
-; X32: cmp
+; X32: dec
; X32: jne
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
index deca954fea789..c5ce3bc2b6bef 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
@@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx"
; CHECK-LABEL: @test2
; CHECK-LABEL: test2.loop:
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ]
-; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
+; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
;
; CHECK-LABEL: for.end:
-; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0
+; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
-; CHECK: %0 = sub i32 0, %sub.us
-; CHECK: %1 = sub i32 %0, %lsr.iv.next
-; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1
+; CHECK: %1 = sub i32 0, %sub.us
+; CHECK: %2 = add i32 %1, %lsr.iv.next
+; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
; CHECK: %f = ashr i32 %sext.us, 24
; CHECK: ret i32 %f
define i32 @test2() {
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
index 7f163500a737f..4888536bdf819 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; OPT test checks that LSR optimize compare for static counter to compare with 0.
diff --git a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
index 239cc02335067..3273cb4e6b5bc 100644
--- a/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/lsr-insns-2.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
-; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
+; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
; OPT checks that LSR prefers less instructions to less registers.
diff --git a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
index e05d5aa3027be..b563eb3ad9940 100644
--- a/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-reduce -S < %s | FileCheck %s
; Check when we use an outerloop induction variable inside of an innerloop
; induction value expr, LSR can still choose to use single induction variable
@@ -23,21 +22,18 @@ for.body: ; preds = %for.inc, %entry
for.body2.preheader: ; preds = %for.body
br label %for.body2
-; Check LSR only generates two induction variables for for.body2 one for compare and
-; one to shared by multiple array accesses.
+; Check LSR only generates one induction variable for for.body2 and the induction
+; variable will be shared by multiple array accesses.
; CHECK: for.body2:
-; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
-; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
+; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ 0, %for.body2.preheader ]
; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
-; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
-; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
-; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
-; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
+; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* %maxarray, i64 [[LSR]]
+; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* [[SCEVGEP1]], i64 1
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
-; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
-; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
-; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
-; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
+; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
+; CHECK: {{.*}} = load i8, i8* [[SCEVGEP3]], align 1
+; CHECK: [[SCEVGEP4:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
+; CHECK: store i8 {{.*}}, i8* [[SCEVGEP4]], align 1
; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
for.body2: ; preds = %for.body2.preheader, %for.body2
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index a35596aff11c6..d115787b6eafa 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -2,7 +2,6 @@
; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info < %s | FileCheck %s
-; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
; STATS: 2 loop-unswitch - Number of switches unswitched
; CHECK: %1 = icmp eq i32 %c, 1
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 393dd5c313a4c..c4e8d6f889804 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -2,7 +2,6 @@
; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info < %s | FileCheck %s
-; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
; STATS: 1 loop-unswitch - Number of switches unswitched
; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 20f03c987eb7e..18e544d86cafc 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -2,7 +2,6 @@
; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info < %s | FileCheck %s
-; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
; STATS: 3 loop-unswitch - Number of switches unswitched
; CHECK: %1 = icmp eq i32 %c, 1
diff --git a/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll b/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
new file mode 100644
index 0000000000000..832843983ebf7
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AMDGPU/packed-math.ll
@@ -0,0 +1,34 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s -loop-vectorize -dce -instcombine -S | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s -loop-vectorize -dce -instcombine -S | FileCheck -check-prefix=CIVI -check-prefix=GCN %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s -loop-vectorize -dce -instcombine -S | FileCheck -check-prefix=CIVI -check-prefix=GCN %s
+
+; GCN-LABEL: @vectorize_v2f16_loop(
+; GFX9: vector.body:
+; GFX9: phi <2 x half>
+; GFX9: load <2 x half>
+; GFX9: fadd fast <2 x half>
+
+; GFX9: middle.block:
+; GFX9: fadd fast <2 x half>
+
+; VI: phi half
+; VI: phi load half
+; VI: fadd fast half
+define half @vectorize_v2f16_loop(half addrspace(1)* noalias %s) {
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %q.04 = phi half [ 0.0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds half, half addrspace(1)* %s, i64 %indvars.iv
+ %0 = load half, half addrspace(1)* %arrayidx, align 2
+ %add = fadd fast half %q.04, %0
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 256
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ %add.lcssa = phi half [ %add, %for.body ]
+ ret half %add.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index 47c262b11b463..89d69e232f5b5 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -115,32 +115,6 @@ define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
ret void
}
-; N is unknown, we need a tail. Can't vectorize because the loop is cold.
-;CHECK-LABEL: @example4(
-;CHECK-NOT: <4 x i32>
-;CHECK: ret void
-define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) {
- %1 = icmp eq i32 %n, 0
- br i1 %1, label %._crit_edge, label %.lr.ph, !prof !0
-
-.lr.ph: ; preds = %0, %.lr.ph
- %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
- %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
- %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
- %2 = add nsw i32 %.05, -1
- %3 = getelementptr inbounds i32, i32* %.023, i64 1
- %4 = load i32, i32* %.023, align 16
- %5 = getelementptr inbounds i32, i32* %.014, i64 1
- store i32 %4, i32* %.014, align 16
- %6 = icmp eq i32 %2, 0
- br i1 %6, label %._crit_edge, label %.lr.ph
-
-._crit_edge: ; preds = %.lr.ph, %0
- ret void
-}
-
-!0 = !{!"branch_weights", i32 64, i32 4}
-
; We can't vectorize this one because we need a runtime ptr check.
;CHECK-LABEL: @example23(
;CHECK-NOT: <4 x i32>
diff --git a/test/Transforms/LoopVectorize/tripcount.ll b/test/Transforms/LoopVectorize/tripcount.ll
new file mode 100644
index 0000000000000..03b3aa171d4ff
--- /dev/null
+++ b/test/Transforms/LoopVectorize/tripcount.ll
@@ -0,0 +1,91 @@
+; This test verifies that the loop vectorizer will not vectorizes low trip count
+; loops that require runtime checks (Trip count is computed with profile info).
+; REQUIRES: asserts
+; RUN: opt < %s -loop-vectorize -loop-vectorize-with-block-frequency -S | FileCheck %s
+
+target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
+
+@tab = common global [32 x i8] zeroinitializer, align 1
+
+define i32 @foo_low_trip_count1(i32 %bound) {
+; Simple loop with low tripcount. Should not be vectorized.
+
+; CHECK-LABEL: @foo_low_trip_count1(
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, %bound
+ br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+define i32 @foo_low_trip_count2(i32 %bound) !prof !0 {
+; The loop has a same invocation count with the function, but has a low
+; trip_count per invocation and not worth to vectorize.
+
+; CHECK-LABEL: @foo_low_trip_count2(
+; CHECK-NOT: <{{[0-9]+}} x i8>
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, %bound
+ br i1 %exitcond, label %for.end, label %for.body, !prof !1
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+define i32 @foo_low_trip_count3(i1 %cond, i32 %bound) !prof !0 {
+; The loop has low invocation count compare to the function invocation count,
+; but has a high trip count per invocation. Vectorize it.
+
+; CHECK-LABEL: @foo_low_trip_count3(
+; CHECK: vector.body:
+
+entry:
+ br i1 %cond, label %for.preheader, label %for.end, !prof !2
+
+for.preheader:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.08 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+ %0 = load i8, i8* %arrayidx, align 1
+ %cmp1 = icmp eq i8 %0, 0
+ %. = select i1 %cmp1, i8 2, i8 1
+ store i8 %., i8* %arrayidx, align 1
+ %inc = add nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %i.08, %bound
+ br i1 %exitcond, label %for.end, label %for.body, !prof !3
+
+for.end: ; preds = %for.body
+ ret i32 0
+}
+
+
+!0 = !{!"function_entry_count", i64 100}
+!1 = !{!"branch_weights", i32 100, i32 0}
+!2 = !{!"branch_weights", i32 10, i32 90}
+!3 = !{!"branch_weights", i32 10, i32 10000}
diff --git a/test/Transforms/LowerTypeTests/export-icall.ll b/test/Transforms/LowerTypeTests/export-icall.ll
index ad36048993067..f53b63af496ca 100644
--- a/test/Transforms/LowerTypeTests/export-icall.ll
+++ b/test/Transforms/LowerTypeTests/export-icall.ll
@@ -60,11 +60,6 @@ declare !type !8 void @f(i32 %x)
; SUMMARY-NEXT: SizeM1BitWidth: 0
; SUMMARY-NEXT: WPDRes:
-; SUMMARY: CfiFunctionDefs:
-; SUMMARY-NEXT: - f
-; SUMMARY-NEXT: - g
-; SUMMARY-NEXT: - h
-; SUMMARY-NEXT: CfiFunctionDecls:
-; SUMMARY-NEXT: - external
-; SUMMARY-NEXT: - external_weak
+; SUMMARY: CfiFunctionDefs: [ f, g, h ]
+; SUMMARY-NEXT: CfiFunctionDecls: [ external, external_weak ]
; SUMMARY-NEXT: ...
diff --git a/test/Transforms/NewGVN/pr33461.ll b/test/Transforms/NewGVN/pr33461.ll
new file mode 100644
index 0000000000000..0a41d6834a4a7
--- /dev/null
+++ b/test/Transforms/NewGVN/pr33461.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;; Ensure the store verifier is not overzealous
+; RUN: opt -newgvn -S %s | FileCheck %s
+@b = external global i16, align 2
+
+define void @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[FOR_COND1:%.*]], label [[FOR_INC:%.*]]
+; CHECK: for.cond1:
+; CHECK-NEXT: [[TMP0:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_INC]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT: store i16 [[TMP0]], i16* @b, align 2
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* @b, align 2
+; CHECK-NEXT: [[INC]] = add i16 [[TMP1]], 1
+; CHECK-NEXT: store i16 [[INC]], i16* @b, align 2
+; CHECK-NEXT: br label [[FOR_COND1]]
+;
+entry:
+ br i1 false, label %for.cond1, label %for.inc
+
+for.cond1:
+ %e.0 = phi i16* [ %e.1, %for.inc ], [ null, %entry ]
+ %0 = load i16, i16* %e.0, align 2
+ %add = add i16 %0, 0
+ store i16 %add, i16* %e.0, align 2
+ br label %for.inc
+
+for.inc:
+ %e.1 = phi i16* [ %e.0, %for.cond1 ], [ @b, %entry ]
+ %1 = load i16, i16* @b, align 2
+ %inc = add i16 %1, 1
+ store i16 %inc, i16* @b, align 2
+ br label %for.cond1
+}
diff --git a/test/Transforms/PGOProfile/counter_promo.ll b/test/Transforms/PGOProfile/counter_promo.ll
new file mode 100644
index 0000000000000..125e7d77cdf43
--- /dev/null
+++ b/test/Transforms/PGOProfile/counter_promo.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO --check-prefix=NONATOMIC_PROMO %s
+; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -S | FileCheck --check-prefix=PROMO --check-prefix=NONATOMIC_PROMO %s
+; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -atomic-counter-update-promoted -S | FileCheck --check-prefix=PROMO --check-prefix=ATOMIC_PROMO %s
+; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -atomic-counter-update-promoted -S | FileCheck --check-prefix=PROMO --check-prefix=ATOMIC_PROMO %s
+
+define void @foo(i32 %n, i32 %N) {
+; PROMO-LABEL: @foo
+bb:
+ %tmp = add nsw i32 %n, 1
+ %tmp1 = add nsw i32 %n, -1
+ br label %bb2
+
+bb2: ; preds = %bb9, %bb
+; PROMO: phi {{.*}}
+; PROMO-NEXT: phi {{.*}}
+; PROMO-NEXT: phi {{.*}}
+; PROMO-NEXT: phi {{.*}}
+ %i.0 = phi i32 [ 0, %bb ], [ %tmp10, %bb9 ]
+ %tmp3 = icmp slt i32 %i.0, %tmp
+ br i1 %tmp3, label %bb4, label %bb5
+
+bb4: ; preds = %bb2
+ tail call void @bar(i32 1)
+ br label %bb9
+
+bb5: ; preds = %bb2
+ %tmp6 = icmp slt i32 %i.0, %tmp1
+ br i1 %tmp6, label %bb7, label %bb8
+
+bb7: ; preds = %bb5
+ tail call void @bar(i32 2)
+ br label %bb9
+
+bb8: ; preds = %bb5
+ tail call void @bar(i32 3)
+ br label %bb9
+
+bb9: ; preds = %bb8, %bb7, %bb4
+; PROMO: %[[LIVEOUT3:[a-z0-9]+]] = phi {{.*}}
+; PROMO-NEXT: %[[LIVEOUT2:[a-z0-9]+]] = phi {{.*}}
+; PROMO-NEXT: %[[LIVEOUT1:[a-z0-9]+]] = phi {{.*}}
+ %tmp10 = add nsw i32 %i.0, 1
+ %tmp11 = icmp slt i32 %tmp10, %N
+ br i1 %tmp11, label %bb2, label %bb12
+
+bb12: ; preds = %bb9
+ ret void
+; NONATOMIC_PROMO: %[[PROMO1:[a-z0-9.]+]] = load {{.*}} @__profc_foo{{.*}} 0)
+; NONATOMIC_PROMO-NEXT: add {{.*}} %[[PROMO1]], %[[LIVEOUT1]]
+; NONATOMIC_PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}0)
+; NONATOMIC_PROMO-NEXT: %[[PROMO2:[a-z0-9.]+]] = load {{.*}} @__profc_foo{{.*}} 1)
+; NONATOMIC_PROMO-NEXT: add {{.*}} %[[PROMO2]], %[[LIVEOUT2]]
+; NONATOMIC_PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}1)
+; NONATOMIC_PROMO-NEXT: %[[PROMO3:[a-z0-9.]+]] = load {{.*}} @__profc_foo{{.*}} 2)
+; NONATOMIC_PROMO-NEXT: add {{.*}} %[[PROMO3]], %[[LIVEOUT3]]
+; NONATOMIC_PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2)
+; ATOMIC_PROMO: atomicrmw add {{.*}} @__profc_foo{{.*}}0), i64 %[[LIVEOUT1]] seq_cst
+; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}1), i64 %[[LIVEOUT2]] seq_cst
+; ATOMIC_PROMO-NEXT: atomicrmw add {{.*}} @__profc_foo{{.*}}2), i64 %[[LIVEOUT3]] seq_cst
+; PROMO: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}3)
+; PROMO-NOT: @__profc_foo
+
+
+}
+
+declare void @bar(i32)
diff --git a/test/Transforms/PGOProfile/counter_promo_exit_merge.ll b/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
new file mode 100644
index 0000000000000..f53d37600ce61
--- /dev/null
+++ b/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
+; RUN: opt < %s --passes=instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
+
+$__llvm_profile_raw_version = comdat any
+
+@g = common local_unnamed_addr global i32 0, align 4
+@__llvm_profile_raw_version = constant i64 72057594037927940, comdat
+@__profn_foo = private constant [3 x i8] c"foo"
+
+define void @foo(i32 %arg) local_unnamed_addr {
+bb:
+ %tmp = add nsw i32 %arg, -1
+ br label %bb1
+
+bb1: ; preds = %bb11, %bb
+ %tmp2 = phi i32 [ 0, %bb ], [ %tmp12, %bb11 ]
+ %tmp3 = icmp sgt i32 %tmp2, %arg
+ br i1 %tmp3, label %bb7, label %bb4
+
+bb4: ; preds = %bb1
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 63969943867, i32 5, i32 1)
+ tail call void @bar(i32 1)
+ %tmp5 = load i32, i32* @g, align 4
+ %tmp6 = icmp sgt i32 %tmp5, 100
+ br i1 %tmp6, label %bb14, label %bb11
+
+bb7: ; preds = %bb1
+ %tmp8 = icmp slt i32 %tmp2, %tmp
+ br i1 %tmp8, label %bb9, label %bb10
+
+bb9: ; preds = %bb7
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 63969943867, i32 5, i32 2)
+ tail call void @bar(i32 2)
+ br label %bb11
+
+bb10: ; preds = %bb7
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 63969943867, i32 5, i32 3)
+ tail call void @bar(i32 3)
+ br label %bb11
+
+bb11: ; preds = %bb10, %bb9, %bb4
+ %tmp12 = add nuw nsw i32 %tmp2, 1
+ %tmp13 = icmp slt i32 %tmp2, 99
+ br i1 %tmp13, label %bb1, label %bb14
+
+bb14: ; preds = %bb4.bb14_crit_edge, %bb11
+ tail call void @bar(i32 0)
+ br label %bb15
+; PROMO-LABEL: bb14:
+; PROMO: %[[MERGE1:[a-z0-9]+]] = phi {{.*}}
+; PROMO-NEXT: %[[MERGE2:[a-z0-9.]+]] = phi {{.*}}
+; PROMO-NEXT: %[[MERGE3:[a-z0-9.]+]] = phi {{.*}}
+; PROMO-NEXT: %[[PROMO3:[a-z0-9.]+]] = load{{.*}}@__profc_foo{{.*}}1)
+; PROMO-NEXT: {{.*}} = add {{.*}}%[[PROMO3]], %[[MERGE3]]
+; PROMO-NEXT: store{{.*}}@__profc_foo{{.*}}1)
+; PROMO-NEXT: %[[PROMO2:[a-z0-9.]+]] = load{{.*}}@__profc_foo{{.*}}2)
+; PROMO-NEXT: {{.*}} = add {{.*}}%[[PROMO2]], %[[MERGE2]]
+; PROMO-NEXT: store{{.*}}@__profc_foo{{.*}}2)
+; PROMO-NEXT: %[[PROMO1:[a-z0-9.]+]] = load{{.*}}@__profc_foo{{.*}}3)
+; PROMO-NEXT: {{.*}} = add {{.*}}%[[PROMO1]], %[[MERGE1]]
+; PROMO-NEXT: store{{.*}}@__profc_foo{{.*}}3)
+
+bb15: ; preds = %bb14
+ call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 63969943867, i32 5, i32 4)
+ tail call void @bar(i32 1)
+ ret void
+}
+
+declare void @bar(i32) local_unnamed_addr
+
+; Function Attrs: nounwind
+declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/PGOProfile/counter_promo_mexits.ll b/test/Transforms/PGOProfile/counter_promo_mexits.ll
new file mode 100644
index 0000000000000..71e5f066d50f3
--- /dev/null
+++ b/test/Transforms/PGOProfile/counter_promo_mexits.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -pgo-instr-gen -instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
+; RUN: opt < %s --passes=pgo-instr-gen,instrprof -do-counter-promotion=true -speculative-counter-promotion -S | FileCheck --check-prefix=PROMO %s
+
+@g = common local_unnamed_addr global i32 0, align 4
+
+define void @foo(i32 %arg) local_unnamed_addr {
+; PROMO-LABEL: @foo
+bb:
+ %tmp = add nsw i32 %arg, -1
+ br label %bb1
+bb1: ; preds = %bb11, %bb
+ %tmp2 = phi i32 [ 0, %bb ], [ %tmp12, %bb11 ]
+ %tmp3 = icmp sgt i32 %tmp2, %arg
+ br i1 %tmp3, label %bb7, label %bb4
+
+bb4: ; preds = %bb1
+ tail call void @bar(i32 1)
+ %tmp5 = load i32, i32* @g, align 4
+ %tmp6 = icmp sgt i32 %tmp5, 100
+ br i1 %tmp6, label %bb15_0, label %bb11
+
+bb7: ; preds = %bb1
+ %tmp8 = icmp slt i32 %tmp2, %tmp
+ br i1 %tmp8, label %bb9, label %bb10
+
+bb9: ; preds = %bb7
+ tail call void @bar(i32 2)
+ br label %bb11
+
+bb10: ; preds = %bb7
+ tail call void @bar(i32 3)
+ br label %bb11
+
+bb11: ; preds = %bb10, %bb9, %bb4
+ %tmp12 = add nuw nsw i32 %tmp2, 1
+ %tmp13 = icmp slt i32 %tmp2, 99
+ br i1 %tmp13, label %bb1, label %bb14
+
+bb14: ; preds = %bb11
+; PROMO-LABEL: bb14:
+ tail call void @bar(i32 0)
+ br label %bb15
+; PROMO: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 0)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}0)
+; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 1)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}1)
+; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 2)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2)
+; PROMO-NEXT: %pgocount{{.*}} = load {{.*}} @__profc_foo{{.*}} 3)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}3)
+
+
+bb15_0: ; preds = %bb11
+; PROMO-LABEL: bb15_0:
+ br label %bb15
+; PROMO: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 0)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}0)
+; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 1)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}1)
+; PROMO-NEXT: %pgocount.promoted{{.*}} = load {{.*}} @__profc_foo{{.*}} 2)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2)
+; PROMO-NEXT: %pgocount{{.*}} = load {{.*}} @__profc_foo{{.*}} 4)
+; PROMO-NEXT: add
+; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}4)
+; PROMO-NOT: @__profc_foo
+
+
+bb15: ; preds = %bb14, %bb4
+ tail call void @bar(i32 1)
+ ret void
+}
+
+declare void @bar(i32) local_unnamed_addr
diff --git a/test/Transforms/PGOProfile/memop_size_from_strlen.ll b/test/Transforms/PGOProfile/memop_size_from_strlen.ll
new file mode 100644
index 0000000000000..527f328b2d2fe
--- /dev/null
+++ b/test/Transforms/PGOProfile/memop_size_from_strlen.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1)
+declare i32 @strlen(i8* nocapture)
+
+; CHECK_LABEL: test
+; CHECK: %1 = zext i32 %c to i64
+; CHECK: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_test, i32 0, i32 0), i64 12884901887, i64 %1, i32 1, i32 0)
+
+define void @test(i8* %a, i8* %p) {
+ %c = call i32 @strlen(i8* %p)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %p, i32 %c, i32 1, i1 false)
+ ret void
+}
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
index fb76b9d990b06..d4c7b809f6164 100644
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -205,15 +205,25 @@ define <2 x i32> @test16(<2 x i32> %x, <2 x i32> %y) {
ret <2 x i32> %tmp3
}
-; FIXME: Optimize vector xor. Currently only commute operands.
define <2 x i32> @test17(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: test17
-; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y
-; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y
-; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
%tmp1 = xor <2 x i32> %x, %y
%tmp2 = xor <2 x i32> %y, %x
%tmp3 = xor <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
+
+define <2 x i32> @test18(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test18
+; CHECK-NEXT: %tmp5 = xor <2 x i32> %y, %x
+; CHECK-NEXT: ret <2 x i32> %tmp5
+
+ %tmp1 = xor <2 x i32> %x, %y
+ %tmp2 = xor <2 x i32> %y, %x
+ %tmp3 = xor <2 x i32> %x, %y
+ %tmp4 = xor <2 x i32> %tmp1, %tmp2
+ %tmp5 = xor <2 x i32> %tmp4, %tmp3
+ ret <2 x i32> %tmp5
+}
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index 0bed6f3588080..4d617ea03101b 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -19,6 +19,19 @@ define i32 @xor1(i32 %x) {
;CHECK: %xor = xor i32 %and.ra, 435
}
+; (x | c1) ^ (x | c2) => (x & c3) ^ c3, where c3 = c1^c2
+;
+define <2 x i32> @xor1_vec(<2 x i32> %x) {
+ %or = or <2 x i32> %x, <i32 123, i32 123>
+ %or1 = or <2 x i32> %x, <i32 456, i32 456>
+ %xor = xor <2 x i32> %or, %or1
+ ret <2 x i32> %xor
+
+;CHECK-LABEL: @xor1_vec(
+;CHECK: %and.ra = and <2 x i32> %x, <i32 435, i32 435>
+;CHECK: %xor = xor <2 x i32> %and.ra, <i32 435, i32 435>
+}
+
; Test rule : (x & c1) ^ (x & c2) = (x & (c1^c2))
; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y
define i32 @xor2(i32 %x, i32 %y) {
@@ -33,6 +46,20 @@ define i32 @xor2(i32 %x, i32 %y) {
;CHECK: %xor2 = xor i32 %and.ra, %y
}
+; Test rule : (x & c1) ^ (x & c2) = (x & (c1^c2))
+; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y
+define <2 x i32> @xor2_vec(<2 x i32> %x, <2 x i32> %y) {
+ %and = and <2 x i32> %x, <i32 123, i32 123>
+ %xor = xor <2 x i32> %and, %y
+ %and1 = and <2 x i32> %x, <i32 456, i32 456>
+ %xor2 = xor <2 x i32> %xor, %and1
+ ret <2 x i32> %xor2
+
+;CHECK-LABEL: @xor2_vec(
+;CHECK: %and.ra = and <2 x i32> %x, <i32 435, i32 435>
+;CHECK: %xor2 = xor <2 x i32> %and.ra, %y
+}
+
; Test rule: (x | c1) ^ (x & c2) = (x & c3) ^ c1, where c3 = ~c1 ^ c2
; c3 = ~c1 ^ c2
define i32 @xor3(i32 %x, i32 %y) {
@@ -48,6 +75,21 @@ define i32 @xor3(i32 %x, i32 %y) {
;CHECK: %xor1 = xor i32 %xor, %and.ra
}
+; Test rule: (x | c1) ^ (x & c2) = (x & c3) ^ c1, where c3 = ~c1 ^ c2
+; c3 = ~c1 ^ c2
+define <2 x i32> @xor3_vec(<2 x i32> %x, <2 x i32> %y) {
+ %or = or <2 x i32> %x, <i32 123, i32 123>
+ %xor = xor <2 x i32> %or, %y
+ %and = and <2 x i32> %x, <i32 456, i32 456>
+ %xor1 = xor <2 x i32> %xor, %and
+ ret <2 x i32> %xor1
+
+;CHECK-LABEL: @xor3_vec(
+;CHECK: %and.ra = and <2 x i32> %x, <i32 -436, i32 -436>
+;CHECK: %xor = xor <2 x i32> %y, <i32 123, i32 123>
+;CHECK: %xor1 = xor <2 x i32> %xor, %and.ra
+}
+
; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2)
define i32 @xor4(i32 %x, i32 %y) {
%and = and i32 %x, -124
@@ -60,6 +102,18 @@ define i32 @xor4(i32 %x, i32 %y) {
; CHECK: %xor1 = xor i32 %xor, %and
}
+; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2)
+define <2 x i32> @xor4_vec(<2 x i32> %x, <2 x i32> %y) {
+ %and = and <2 x i32> %x, <i32 -124, i32 -124>
+ %xor = xor <2 x i32> %y, <i32 435, i32 435>
+ %xor1 = xor <2 x i32> %xor, %and
+ ret <2 x i32> %xor1
+; CHECK-LABEL: @xor4_vec(
+; CHECK: %and = and <2 x i32> %x, <i32 -124, i32 -124>
+; CHECK: %xor = xor <2 x i32> %y, <i32 435, i32 435>
+; CHECK: %xor1 = xor <2 x i32> %xor, %and
+}
+
; ==========================================================================
;
; Xor reassociation special cases
@@ -80,6 +134,19 @@ define i32 @xor_special1(i32 %x, i32 %y) {
}
; Special case1:
+; (x | c1) ^ (x & ~c1) = c1
+define <2 x i32> @xor_special1_vec(<2 x i32> %x, <2 x i32> %y) {
+ %or = or <2 x i32> %x, <i32 123, i32 123>
+ %xor = xor <2 x i32> %or, %y
+ %and = and <2 x i32> %x, <i32 -124, i32 -124>
+ %xor1 = xor <2 x i32> %xor, %and
+ ret <2 x i32> %xor1
+; CHECK-LABEL: @xor_special1_vec(
+; CHECK: %xor1 = xor <2 x i32> %y, <i32 123, i32 123>
+; CHECK: ret <2 x i32> %xor1
+}
+
+; Special case1:
; (x | c1) ^ (x & c1) = x ^ c1
define i32 @xor_special2(i32 %x, i32 %y) {
%or = or i32 %x, 123
@@ -93,6 +160,20 @@ define i32 @xor_special2(i32 %x, i32 %y) {
; CHECK: ret i32 %xor1
}
+; Special case1:
+; (x | c1) ^ (x & c1) = x ^ c1
+define <2 x i32> @xor_special2_vec(<2 x i32> %x, <2 x i32> %y) {
+ %or = or <2 x i32> %x, <i32 123, i32 123>
+ %xor = xor <2 x i32> %or, %y
+ %and = and <2 x i32> %x, <i32 123, i32 123>
+ %xor1 = xor <2 x i32> %xor, %and
+ ret <2 x i32> %xor1
+; CHECK-LABEL: @xor_special2_vec(
+; CHECK: %xor = xor <2 x i32> %x, <i32 123, i32 123>
+; CHECK: %xor1 = xor <2 x i32> %xor, %y
+; CHECK: ret <2 x i32> %xor1
+}
+
; (x | c1) ^ (x | c1) => 0
define i32 @xor_special3(i32 %x) {
%or = or i32 %x, 123
@@ -103,6 +184,16 @@ define i32 @xor_special3(i32 %x) {
;CHECK: ret i32 0
}
+; (x | c1) ^ (x | c1) => 0
+define <2 x i32> @xor_special3_vec(<2 x i32> %x) {
+ %or = or <2 x i32> %x, <i32 123, i32 123>
+ %or1 = or <2 x i32> %x, <i32 123, i32 123>
+ %xor = xor <2 x i32> %or, %or1
+ ret <2 x i32> %xor
+;CHECK-LABEL: @xor_special3_vec(
+;CHECK: ret <2 x i32> zeroinitializer
+}
+
; (x & c1) ^ (x & c1) => 0
define i32 @xor_special4(i32 %x) {
%or = and i32 %x, 123
@@ -113,6 +204,16 @@ define i32 @xor_special4(i32 %x) {
;CHECK: ret i32 0
}
+; (x & c1) ^ (x & c1) => 0
+define <2 x i32> @xor_special4_vec(<2 x i32> %x) {
+ %or = and <2 x i32> %x, <i32 123, i32 123>
+ %or1 = and <2 x i32> <i32 123, i32 123>, %x
+ %xor = xor <2 x i32> %or, %or1
+ ret <2 x i32> %xor
+;CHECK-LABEL: @xor_special4_vec(
+;CHECK: ret <2 x i32> zeroinitializer
+}
+
; ==========================================================================
;
; Xor reassociation curtail code size
diff --git a/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll b/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
new file mode 100644
index 0000000000000..53e306270ac53
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AMDGPU/packed-math.ll
@@ -0,0 +1,195 @@
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -dce < %s | FileCheck -check-prefixes=GCN,VI %s
+
+; FIXME: Should still like to vectorize the memory operations for VI
+
+; Simple 3-pair chain with loads and stores
+; GCN-LABEL: @test1_as_3_3_3_v2f16(
+; GFX9: load <2 x half>, <2 x half> addrspace(3)*
+; GFX9: load <2 x half>, <2 x half> addrspace(3)*
+; GFX9: fmul <2 x half>
+; GFX9: store <2 x half> %{{.*}}, <2 x half> addrspace(3)* %
+; GFX9: ret
+
+; VI: load half
+; VI: load half
+define amdgpu_kernel void @test1_as_3_3_3_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %i1 = load half, half addrspace(3)* %b, align 2
+ %mul = fmul half %i0, %i1
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
+ %i4 = load half, half addrspace(3)* %arrayidx4, align 2
+ %mul5 = fmul half %i3, %i4
+ store half %mul, half addrspace(3)* %c, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ store half %mul5, half addrspace(3)* %arrayidx5, align 2
+ ret void
+}
+
+; GCN-LABEL: @test1_as_3_0_0(
+; GFX9: load <2 x half>, <2 x half> addrspace(3)*
+; GFX9: load <2 x half>, <2 x half>*
+; GFX9: fmul <2 x half>
+; GFX9: store <2 x half> %{{.*}}, <2 x half>* %
+; GFX9: ret
+
+; VI: load half
+; VI: load half
+define amdgpu_kernel void @test1_as_3_0_0(half addrspace(3)* %a, half* %b, half* %c) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %i1 = load half, half* %b, align 2
+ %mul = fmul half %i0, %i1
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %arrayidx4 = getelementptr inbounds half, half* %b, i64 1
+ %i4 = load half, half* %arrayidx4, align 2
+ %mul5 = fmul half %i3, %i4
+ store half %mul, half* %c, align 2
+ %arrayidx5 = getelementptr inbounds half, half* %c, i64 1
+ store half %mul5, half* %arrayidx5, align 2
+ ret void
+}
+
+; GCN-LABEL: @test1_as_0_0_3_v2f16(
+; GFX9: load <2 x half>, <2 x half>*
+; GFX9: load <2 x half>, <2 x half>*
+; GFX9: fmul <2 x half>
+; GFX9: store <2 x half> %{{.*}}, <2 x half> addrspace(3)* %
+; GFX9: ret
+
+; VI: load half
+; VI: load half
+define amdgpu_kernel void @test1_as_0_0_3_v2f16(half* %a, half* %b, half addrspace(3)* %c) {
+ %i0 = load half, half* %a, align 2
+ %i1 = load half, half* %b, align 2
+ %mul = fmul half %i0, %i1
+ %arrayidx3 = getelementptr inbounds half, half* %a, i64 1
+ %i3 = load half, half* %arrayidx3, align 2
+ %arrayidx4 = getelementptr inbounds half, half* %b, i64 1
+ %i4 = load half, half* %arrayidx4, align 2
+ %mul5 = fmul half %i3, %i4
+ store half %mul, half addrspace(3)* %c, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ store half %mul5, half addrspace(3)* %arrayidx5, align 2
+ ret void
+}
+
+; GCN-LABEL: @test1_fma_v2f16(
+; GFX9: load <2 x half>
+; GFX9: load <2 x half>
+; GFX9: load <2 x half>
+; GFX9: call <2 x half> @llvm.fma.v2f16(
+; GFX9: store <2 x half>
+define amdgpu_kernel void @test1_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %i1 = load half, half addrspace(3)* %b, align 2
+ %i2 = load half, half addrspace(3)* %c, align 2
+ %fma0 = call half @llvm.fma.f16(half %i0, half %i1, half %i2)
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
+ %i4 = load half, half addrspace(3)* %arrayidx4, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ %i5 = load half, half addrspace(3)* %arrayidx5, align 2
+ %fma1 = call half @llvm.fma.f16(half %i3, half %i4, half %i5)
+ store half %fma0, half addrspace(3)* %d, align 2
+ %arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
+ store half %fma1, half addrspace(3)* %arrayidx6, align 2
+ ret void
+}
+
+; GCN-LABEL: @mul_scalar_v2f16(
+; GFX9: load <2 x half>
+; GFX9: fmul <2 x half>
+; GFX9: store <2 x half>
+define amdgpu_kernel void @mul_scalar_v2f16(half addrspace(3)* %a, half %scalar, half addrspace(3)* %c) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %mul = fmul half %i0, %scalar
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %mul5 = fmul half %i3, %scalar
+ store half %mul, half addrspace(3)* %c, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ store half %mul5, half addrspace(3)* %arrayidx5, align 2
+ ret void
+}
+
+; GCN-LABEL: @fabs_v2f16
+; GFX9: load <2 x half>
+; GFX9: call <2 x half> @llvm.fabs.v2f16(
+; GFX9: store <2 x half>
+define amdgpu_kernel void @fabs_v2f16(half addrspace(3)* %a, half addrspace(3)* %c) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %fabs0 = call half @llvm.fabs.f16(half %i0)
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %fabs1 = call half @llvm.fabs.f16(half %i3)
+ store half %fabs0, half addrspace(3)* %c, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ store half %fabs1, half addrspace(3)* %arrayidx5, align 2
+ ret void
+}
+
+; GCN-LABEL: @test1_fabs_fma_v2f16(
+; GFX9: load <2 x half>
+; GFX9: call <2 x half> @llvm.fabs.v2f16(
+; GFX9: call <2 x half> @llvm.fma.v2f16(
+; GFX9: store <2 x half>
+define amdgpu_kernel void @test1_fabs_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %i1 = load half, half addrspace(3)* %b, align 2
+ %i2 = load half, half addrspace(3)* %c, align 2
+ %i0.fabs = call half @llvm.fabs.f16(half %i0)
+
+ %fma0 = call half @llvm.fma.f16(half %i0.fabs, half %i1, half %i2)
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
+ %i4 = load half, half addrspace(3)* %arrayidx4, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ %i5 = load half, half addrspace(3)* %arrayidx5, align 2
+ %i3.fabs = call half @llvm.fabs.f16(half %i3)
+
+ %fma1 = call half @llvm.fma.f16(half %i3.fabs, half %i4, half %i5)
+ store half %fma0, half addrspace(3)* %d, align 2
+ %arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
+ store half %fma1, half addrspace(3)* %arrayidx6, align 2
+ ret void
+}
+
+; FIXME: Should do vector load and extract component for fabs
+; GCN-LABEL: @test1_fabs_scalar_fma_v2f16(
+; GFX9: load half
+; GFX9: call half @llvm.fabs.f16(
+; GFX9: load <2 x half>
+; GFX9: load half
+; GFX9: load <2 x half>
+; GFX9: call <2 x half> @llvm.fma.v2f16(
+; GFX9: store <2 x half>
+define amdgpu_kernel void @test1_fabs_scalar_fma_v2f16(half addrspace(3)* %a, half addrspace(3)* %b, half addrspace(3)* %c, half addrspace(3)* %d) {
+ %i0 = load half, half addrspace(3)* %a, align 2
+ %i1 = load half, half addrspace(3)* %b, align 2
+ %i2 = load half, half addrspace(3)* %c, align 2
+ %i1.fabs = call half @llvm.fabs.f16(half %i1)
+
+ %fma0 = call half @llvm.fma.f16(half %i0, half %i1.fabs, half %i2)
+ %arrayidx3 = getelementptr inbounds half, half addrspace(3)* %a, i64 1
+ %i3 = load half, half addrspace(3)* %arrayidx3, align 2
+ %arrayidx4 = getelementptr inbounds half, half addrspace(3)* %b, i64 1
+ %i4 = load half, half addrspace(3)* %arrayidx4, align 2
+ %arrayidx5 = getelementptr inbounds half, half addrspace(3)* %c, i64 1
+ %i5 = load half, half addrspace(3)* %arrayidx5, align 2
+ %fma1 = call half @llvm.fma.f16(half %i3, half %i4, half %i5)
+ store half %fma0, half addrspace(3)* %d, align 2
+ %arrayidx6 = getelementptr inbounds half, half addrspace(3)* %d, i64 1
+ store half %fma1, half addrspace(3)* %arrayidx6, align 2
+ ret void
+}
+
+declare half @llvm.fabs.f16(half) #1
+declare half @llvm.fma.f16(half, half, half) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll b/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
deleted file mode 100644
index 63c6d77954d82..0000000000000
--- a/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -slp-vectorizer -dce < %s | FileCheck %s
-; XFAIL: *
-;
-; FIXME: If this test expects to be vectorized, the TTI must indicate that the target
-; has vector registers of the expected width.
-; Currently, it says there are 8 vector registers that are 32-bits wide.
-
-target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
-
-
-; Simple 3-pair chain with loads and stores
-define amdgpu_kernel void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
-; CHECK-LABEL: @test1_as_3_3_3(
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
-; CHECK: ret
- %i0 = load double, double addrspace(3)* %a, align 8
- %i1 = load double, double addrspace(3)* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
- %i3 = load double, double addrspace(3)* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
- %i4 = load double, double addrspace(3)* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double addrspace(3)* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
- store double %mul5, double addrspace(3)* %arrayidx5, align 8
- ret void
-}
-
-define amdgpu_kernel void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
-; CHECK-LABEL: @test1_as_3_0_0(
-; CHECK: load <2 x double>, <2 x double> addrspace(3)*
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
-; CHECK: ret
- %i0 = load double, double addrspace(3)* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
- %i3 = load double, double addrspace(3)* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
- store double %mul5, double* %arrayidx5, align 8
- ret void
-}
-
-define amdgpu_kernel void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
-; CHECK-LABEL: @test1_as_0_0_3(
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: load <2 x double>, <2 x double>*
-; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
-; CHECK: ret
- %i0 = load double, double* %a, align 8
- %i1 = load double, double* %b, align 8
- %mul = fmul double %i0, %i1
- %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
- %i3 = load double, double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
- %i4 = load double, double* %arrayidx4, align 8
- %mul5 = fmul double %i3, %i4
- store double %mul, double addrspace(3)* %c, align 8
- %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
- store double %mul5, double addrspace(3)* %arrayidx5, align 8
- ret void
-}
diff --git a/test/Transforms/SROA/non-integral-pointers.ll b/test/Transforms/SROA/non-integral-pointers.ll
new file mode 100644
index 0000000000000..63286309f6f31
--- /dev/null
+++ b/test/Transforms/SROA/non-integral-pointers.ll
@@ -0,0 +1,46 @@
+; RUN: opt -sroa -S < %s | FileCheck %s
+
+; This test checks that SROA does not introduce ptrtoint and inttoptr
+; casts from and to non-integral pointers. The "ni:4" bit in the
+; datalayout states that pointers of address space 4 are to be
+; considered "non-integral".
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f0(i1 %alwaysFalse, i64 %val) {
+; CHECK-LABEL: @f0(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+entry:
+ %loc = alloca i64
+ store i64 %val, i64* %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+neverTaken:
+ %loc.bc = bitcast i64* %loc to i8 addrspace(4)**
+ %ptr = load i8 addrspace(4)*, i8 addrspace(4)** %loc.bc
+ store i8 5, i8 addrspace(4)* %ptr
+ ret void
+
+alwaysTaken:
+ ret void
+}
+
+define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val) {
+; CHECK-LABEL: @f1(
+; CHECK-NOT: inttoptr
+; CHECK-NOT: ptrtoint
+entry:
+ %loc = alloca i8 addrspace(4)*
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+neverTaken:
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i64*
+ %int = load i64, i64* %loc.bc
+ ret i64 %int
+
+alwaysTaken:
+ ret i64 42
+}
diff --git a/test/Transforms/SampleProfile/Inputs/einline.prof b/test/Transforms/SampleProfile/Inputs/einline.prof
index 90f41d21ca3ba..df0275b50051d 100644
--- a/test/Transforms/SampleProfile/Inputs/einline.prof
+++ b/test/Transforms/SampleProfile/Inputs/einline.prof
@@ -1,3 +1,6 @@
_Z3foov:200:100
1: _Z3barv:0
3: _Z3barv:100
+recursive:200:100
+ 1: recursive:100
+ 2: recursive:100
diff --git a/test/Transforms/SampleProfile/early-inline.ll b/test/Transforms/SampleProfile/early-inline.ll
index 51e7d243c187d..a240635b2b8d3 100644
--- a/test/Transforms/SampleProfile/early-inline.ll
+++ b/test/Transforms/SampleProfile/early-inline.ll
@@ -32,6 +32,19 @@ define internal void @_ZL3barv() !dbg !12 {
ret void
}
+; CHECK-LABEL: @recursive
+define void @recursive() !dbg !13 {
+; Recursive calls should not be early-inlined.
+; CHECK-NOT: call void @recursive
+; CHECK: call void @recursive
+; CHECK: call void @recursive
+; CHECK-NOT: call void @recursive
+; CHECK: ret
+ call void @recursive(), !dbg !14
+ call void @recursive(), !dbg !15
+ ret void
+}
+
declare i32 @__gxx_personality_v0(...)
!llvm.dbg.cu = !{!0}
@@ -46,3 +59,6 @@ declare i32 @__gxx_personality_v0(...)
!10 = !DILocation(line: 8, column: 5, scope: !11)
!11 = distinct !DILexicalBlock(scope: !6, file: !1, line: 7, column: 7)
!12 = distinct !DISubprogram(linkageName: "_ZL3barv", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0)
+!13 = distinct !DISubprogram(linkageName: "recursive", scope: !1, file: !1, line: 20, scopeLine: 20, unit: !0)
+!14 = !DILocation(line: 21, column: 3, scope: !13)
+!15 = !DILocation(line: 22, column: 3, scope: !13)
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 2f9b692d09914..78621b105f5b1 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -7,6 +7,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; then eliminate the tail recursion.
+
@global = external global i32 ; <i32*> [#uses=1]
@extern_weak_global = extern_weak global i32 ; <i32*> [#uses=1]
@@ -145,3 +146,29 @@ else: ; preds = %entry
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
ret i32 %tmp10
}
+
+; This load can be moved above the call because the function call does not write to the memory the load
+; is accessing and the load is safe to speculate.
+define fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
+; CHECK-LABEL: @raise_load_6(
+; CHECK-NOT: call
+; CHECK: load i32, i32*
+; CHECK-NOT: call
+; CHECK: }
+entry:
+ %s = alloca i32
+ store i32 4, i32* %s
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
+
+if: ; preds = %entry
+ store i32 1, i32* %a_arg
+ ret i32 0
+
+else: ; preds = %entry
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_6(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %s ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
+}