summaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/CodeGenPrepare/X86/memcmp.ll1635
-rw-r--r--test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll35
-rw-r--r--test/Transforms/EarlyCSE/globalsaa-memoryssa.ll25
-rw-r--r--test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll79
-rw-r--r--test/Transforms/GVN/PRE/phi-translate.ll4
-rw-r--r--test/Transforms/GlobalOpt/pr33686.ll17
-rw-r--r--test/Transforms/IRCE/eq_ne.ll257
-rw-r--r--test/Transforms/IRCE/pre_post_loops.ll117
-rw-r--r--test/Transforms/Inline/AArch64/ext.ll249
-rw-r--r--test/Transforms/Inline/PowerPC/ext.ll140
-rw-r--r--test/Transforms/Inline/PowerPC/lit.local.cfg3
-rw-r--r--test/Transforms/Inline/X86/ext.ll201
-rw-r--r--test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll24
-rw-r--r--test/Transforms/InstCombine/and-not-or.ll34
-rw-r--r--test/Transforms/InstCombine/and.ll192
-rw-r--r--test/Transforms/InstCombine/and2.ll85
-rw-r--r--test/Transforms/InstCombine/element-atomic-memintrins.ll98
-rw-r--r--test/Transforms/InstCombine/icmp-logical.ll165
-rw-r--r--test/Transforms/InstCombine/or-xor.ll28
-rw-r--r--test/Transforms/InstCombine/or.ll291
-rw-r--r--test/Transforms/InstCombine/pr33765.ll32
-rw-r--r--test/Transforms/JumpThreading/select.ll77
-rw-r--r--test/Transforms/LoopInterchange/current-limitations-lcssa.ll76
-rw-r--r--test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll118
-rw-r--r--test/Transforms/LoopInterchange/interchange-not-profitable.ll66
-rw-r--r--test/Transforms/LoopInterchange/interchange-output-dependencies.ll86
-rw-r--r--test/Transforms/LoopInterchange/interchange-simple-count-down.ll69
-rw-r--r--test/Transforms/LoopInterchange/interchange-simple-count-up.ll86
-rw-r--r--test/Transforms/LoopInterchange/interchange.ll749
-rw-r--r--test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll220
-rw-r--r--test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll64
-rw-r--r--test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll87
-rw-r--r--test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll143
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll126
-rw-r--r--test/Transforms/LoopVectorize/X86/float-induction-x86.ll6
-rw-r--r--test/Transforms/LoopVectorize/debugloc.ll2
-rw-r--r--test/Transforms/LoopVectorize/first-order-recurrence.ll8
-rw-r--r--test/Transforms/LoopVectorize/float-induction.ll14
-rw-r--r--test/Transforms/LoopVectorize/if-conversion-nest.ll25
-rw-r--r--test/Transforms/LoopVectorize/induction-step.ll4
-rw-r--r--test/Transforms/LoopVectorize/induction.ll4
-rw-r--r--test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll6
-rw-r--r--test/Transforms/LoopVectorize/interleaved-accesses.ll10
-rw-r--r--test/Transforms/LoopVectorize/iv_outside_user.ll2
-rw-r--r--test/Transforms/LoopVectorize/miniters.ll4
-rw-r--r--test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll240
-rw-r--r--test/Transforms/LoopVectorize/runtime-check-readonly.ll1
-rw-r--r--test/Transforms/LoopVectorize/runtime-check.ll2
48 files changed, 4907 insertions, 1099 deletions
diff --git a/test/Transforms/CodeGenPrepare/X86/memcmp.ll b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
index 4b9e7c3956f58..1dfc087619653 100644
--- a/test/Transforms/CodeGenPrepare/X86/memcmp.ll
+++ b/test/Transforms/CodeGenPrepare/X86/memcmp.ll
@@ -23,9 +23,63 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp3(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 3)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp3(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32
+; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
+; X32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; X32-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; X32-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
+; X32-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
+; X32-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp3(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; X64-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
+; X64-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
+; X64-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
ret i32 %call
@@ -50,27 +104,225 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp5(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 5)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp5(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X32-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
+; X32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
+; X32-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X32-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; X32-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+; X32-NEXT: br label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp5(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
+; X64-NEXT: [[TMP16:%.*]] = zext i8 [[TMP14]] to i32
+; X64-NEXT: [[TMP17:%.*]] = sub i32 [[TMP15]], [[TMP16]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP17]], [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
ret i32 %call
}
define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp6(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 6)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp6(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
+; X32-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; X32-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
+; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
+; X32-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32
+; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
+; X32-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
+; X32-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp6(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP6]], [[LOADBB:%.*]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP7]], [[LOADBB]] ], [ [[TMP20:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 2
+; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
+; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
+; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP16]])
+; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
+; X64-NEXT: [[TMP20]] = zext i16 [[TMP18]] to i64
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP10]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
ret i32 %call
}
define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp7(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp7(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 2
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 2
+; X32-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; X32-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
+; X32-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
+; X32-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i32
+; X32-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
+; X32-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP17]], [[TMP18]]
+; X32-NEXT: br i1 [[TMP19]], label [[LOADBB2:%.*]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[X]], i8 6
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[Y]], i8 6
+; X32-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP21]]
+; X32-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i32
+; X32-NEXT: [[TMP25:%.*]] = zext i8 [[TMP23]] to i32
+; X32-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]]
+; X32-NEXT: br label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP26]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp7(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64
+; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]]
+; X64-NEXT: br i1 [[TMP8]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP6]], [[LOADBB:%.*]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP7]], [[LOADBB]] ], [ [[TMP20:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP13:%.*]] = getelementptr i16, i16* [[TMP11]], i16 2
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 2
+; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
+; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
+; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP16]])
+; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
+; X64-NEXT: [[TMP20]] = zext i16 [[TMP18]] to i64
+; X64-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP19]], [[TMP20]]
+; X64-NEXT: br i1 [[TMP21]], label [[LOADBB2:%.*]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i8, i8* [[X]], i8 6
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i8, i8* [[Y]], i8 6
+; X64-NEXT: [[TMP24:%.*]] = load i8, i8* [[TMP22]]
+; X64-NEXT: [[TMP25:%.*]] = load i8, i8* [[TMP23]]
+; X64-NEXT: [[TMP26:%.*]] = zext i8 [[TMP24]] to i32
+; X64-NEXT: [[TMP27:%.*]] = zext i8 [[TMP25]] to i32
+; X64-NEXT: [[TMP28:%.*]] = sub i32 [[TMP26]], [[TMP27]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP28]], [[LOADBB2]] ], [ [[TMP10]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
ret i32 %call
@@ -78,8 +330,35 @@ define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp8(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 8)
-; X32-NEXT: ret i32 [[CALL]]
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
;
; X64-LABEL: @cmp8(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
@@ -99,72 +378,691 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp9(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp9(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2:%.*]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X32-NEXT: [[TMP20:%.*]] = load i8, i8* [[TMP18]]
+; X32-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]]
+; X32-NEXT: [[TMP22:%.*]] = zext i8 [[TMP20]] to i32
+; X32-NEXT: [[TMP23:%.*]] = zext i8 [[TMP21]] to i32
+; X32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP22]], [[TMP23]]
+; X32-NEXT: br label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP24]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp9(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]]
+; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
+; X64-NEXT: [[TMP13:%.*]] = zext i8 [[TMP11]] to i32
+; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
+; X64-NEXT: [[TMP15:%.*]] = sub i32 [[TMP13]], [[TMP14]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP15]], [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
ret i32 %call
}
define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp10(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp10(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP26:%.*]], [[LOADBB2:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP27:%.*]], [[LOADBB2]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i16, i16* [[TMP18]], i16 4
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i16, i16* [[TMP19]], i16 4
+; X32-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP21]]
+; X32-NEXT: [[TMP24:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP22]])
+; X32-NEXT: [[TMP25:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP23]])
+; X32-NEXT: [[TMP26]] = zext i16 [[TMP24]] to i32
+; X32-NEXT: [[TMP27]] = zext i16 [[TMP25]] to i32
+; X32-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X32-NEXT: br i1 [[TMP28]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp10(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 4
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4
+; X64-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
+; X64-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i64
+; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
+; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]]
+; X64-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
ret i32 %call
}
define i32 @cmp11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp11(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp11(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP26:%.*]], [[LOADBB2:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP27:%.*]], [[LOADBB2]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i16, i16* [[TMP18]], i16 4
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i16, i16* [[TMP19]], i16 4
+; X32-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP21]]
+; X32-NEXT: [[TMP24:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP22]])
+; X32-NEXT: [[TMP25:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP23]])
+; X32-NEXT: [[TMP26]] = zext i16 [[TMP24]] to i32
+; X32-NEXT: [[TMP27]] = zext i16 [[TMP25]] to i32
+; X32-NEXT: [[TMP28:%.*]] = icmp eq i32 [[TMP26]], [[TMP27]]
+; X32-NEXT: br i1 [[TMP28]], label [[LOADBB3:%.*]], label [[RES_BLOCK]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP29:%.*]] = getelementptr i8, i8* [[X]], i8 10
+; X32-NEXT: [[TMP30:%.*]] = getelementptr i8, i8* [[Y]], i8 10
+; X32-NEXT: [[TMP31:%.*]] = load i8, i8* [[TMP29]]
+; X32-NEXT: [[TMP32:%.*]] = load i8, i8* [[TMP30]]
+; X32-NEXT: [[TMP33:%.*]] = zext i8 [[TMP31]] to i32
+; X32-NEXT: [[TMP34:%.*]] = zext i8 [[TMP32]] to i32
+; X32-NEXT: [[TMP35:%.*]] = sub i32 [[TMP33]], [[TMP34]]
+; X32-NEXT: br label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP35]], [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp11(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i16, i16* [[TMP9]], i16 4
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i16, i16* [[TMP10]], i16 4
+; X64-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
+; X64-NEXT: [[TMP17]] = zext i16 [[TMP15]] to i64
+; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
+; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]]
+; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2:%.*]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[X]], i8 10
+; X64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[Y]], i8 10
+; X64-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]]
+; X64-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP21]]
+; X64-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i32
+; X64-NEXT: [[TMP25:%.*]] = zext i8 [[TMP23]] to i32
+; X64-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP26]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
ret i32 %call
}
define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp12(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp12(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2
+; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]]
+; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]])
+; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]])
+; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]]
+; X32-NEXT: br i1 [[TMP26]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp12(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2
+; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64
+; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
+; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]]
+; X64-NEXT: br i1 [[TMP19]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
ret i32 %call
}
define i32 @cmp13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp13(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp13(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2
+; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]]
+; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]])
+; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]])
+; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]]
+; X32-NEXT: br i1 [[TMP26]], label [[LOADBB3:%.*]], label [[RES_BLOCK]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP27:%.*]] = getelementptr i8, i8* [[X]], i8 12
+; X32-NEXT: [[TMP28:%.*]] = getelementptr i8, i8* [[Y]], i8 12
+; X32-NEXT: [[TMP29:%.*]] = load i8, i8* [[TMP27]]
+; X32-NEXT: [[TMP30:%.*]] = load i8, i8* [[TMP28]]
+; X32-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32
+; X32-NEXT: [[TMP32:%.*]] = zext i8 [[TMP30]] to i32
+; X32-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]]
+; X32-NEXT: br label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP33]], [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp13(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2
+; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64
+; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
+; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]]
+; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2:%.*]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[X]], i8 12
+; X64-NEXT: [[TMP21:%.*]] = getelementptr i8, i8* [[Y]], i8 12
+; X64-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]]
+; X64-NEXT: [[TMP23:%.*]] = load i8, i8* [[TMP21]]
+; X64-NEXT: [[TMP24:%.*]] = zext i8 [[TMP22]] to i32
+; X64-NEXT: [[TMP25:%.*]] = zext i8 [[TMP23]] to i32
+; X64-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP25]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP26]], [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
ret i32 %call
}
define i32 @cmp14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp14(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp14(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ], [ [[TMP35:%.*]], [[LOADBB3:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ], [ [[TMP36:%.*]], [[LOADBB3]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2
+; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]]
+; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]])
+; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]])
+; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]]
+; X32-NEXT: br i1 [[TMP26]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP27:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP28:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP29:%.*]] = getelementptr i16, i16* [[TMP27]], i16 6
+; X32-NEXT: [[TMP30:%.*]] = getelementptr i16, i16* [[TMP28]], i16 6
+; X32-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP29]]
+; X32-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP30]]
+; X32-NEXT: [[TMP33:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP31]])
+; X32-NEXT: [[TMP34:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP32]])
+; X32-NEXT: [[TMP35]] = zext i16 [[TMP33]] to i32
+; X32-NEXT: [[TMP36]] = zext i16 [[TMP34]] to i32
+; X32-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP35]], [[TMP36]]
+; X32-NEXT: br i1 [[TMP37]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp14(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ], [ [[TMP28:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ], [ [[TMP29:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2
+; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64
+; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
+; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]]
+; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP20:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP21:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i16, i16* [[TMP20]], i16 6
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i16, i16* [[TMP21]], i16 6
+; X64-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP22]]
+; X64-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP23]]
+; X64-NEXT: [[TMP26:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP24]])
+; X64-NEXT: [[TMP27:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP25]])
+; X64-NEXT: [[TMP28]] = zext i16 [[TMP26]] to i64
+; X64-NEXT: [[TMP29]] = zext i16 [[TMP27]] to i64
+; X64-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP28]], [[TMP29]]
+; X64-NEXT: br i1 [[TMP30]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
ret i32 %call
}
define i32 @cmp15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp15(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp15(
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
+; X32-NEXT: ret i32 [[CALL]]
+;
+; X64-LABEL: @cmp15(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP17:%.*]], [[LOADBB1]] ], [ [[TMP28:%.*]], [[LOADBB2:%.*]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1]] ], [ [[TMP29:%.*]], [[LOADBB2]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 2
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 2
+; X64-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X64-NEXT: [[TMP17]] = zext i32 [[TMP15]] to i64
+; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
+; X64-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP17]], [[TMP18]]
+; X64-NEXT: br i1 [[TMP19]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP20:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP21:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP22:%.*]] = getelementptr i16, i16* [[TMP20]], i16 6
+; X64-NEXT: [[TMP23:%.*]] = getelementptr i16, i16* [[TMP21]], i16 6
+; X64-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP22]]
+; X64-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP23]]
+; X64-NEXT: [[TMP26:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP24]])
+; X64-NEXT: [[TMP27:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP25]])
+; X64-NEXT: [[TMP28]] = zext i16 [[TMP26]] to i64
+; X64-NEXT: [[TMP29]] = zext i16 [[TMP27]] to i64
+; X64-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP28]], [[TMP29]]
+; X64-NEXT: br i1 [[TMP30]], label [[LOADBB3:%.*]], label [[RES_BLOCK]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP31:%.*]] = getelementptr i8, i8* [[X]], i8 14
+; X64-NEXT: [[TMP32:%.*]] = getelementptr i8, i8* [[Y]], i8 14
+; X64-NEXT: [[TMP33:%.*]] = load i8, i8* [[TMP31]]
+; X64-NEXT: [[TMP34:%.*]] = load i8, i8* [[TMP32]]
+; X64-NEXT: [[TMP35:%.*]] = zext i8 [[TMP33]] to i32
+; X64-NEXT: [[TMP36:%.*]] = zext i8 [[TMP34]] to i32
+; X64-NEXT: [[TMP37:%.*]] = sub i32 [[TMP35]], [[TMP36]]
+; X64-NEXT: br label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP37]], [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
ret i32 %call
}
define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp16(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16)
-; ALL-NEXT: ret i32 [[CALL]]
+; X32-LABEL: @cmp16(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
+; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]]
+; X32-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X32: res_block:
+; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ], [ [[TMP24:%.*]], [[LOADBB2:%.*]] ], [ [[TMP33:%.*]], [[LOADBB3:%.*]] ]
+; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ], [ [[TMP25:%.*]], [[LOADBB2]] ], [ [[TMP34:%.*]], [[LOADBB3]] ]
+; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
+; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP11:%.*]] = getelementptr i32, i32* [[TMP9]], i32 1
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 1
+; X32-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]]
+; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
+; X32-NEXT: [[TMP15]] = call i32 @llvm.bswap.i32(i32 [[TMP13]])
+; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
+; X32-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP15]], [[TMP16]]
+; X32-NEXT: br i1 [[TMP17]], label [[LOADBB2]], label [[RES_BLOCK]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP18:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i32, i32* [[TMP18]], i32 2
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 2
+; X32-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]]
+; X32-NEXT: [[TMP24]] = call i32 @llvm.bswap.i32(i32 [[TMP22]])
+; X32-NEXT: [[TMP25]] = call i32 @llvm.bswap.i32(i32 [[TMP23]])
+; X32-NEXT: [[TMP26:%.*]] = icmp eq i32 [[TMP24]], [[TMP25]]
+; X32-NEXT: br i1 [[TMP26]], label [[LOADBB3]], label [[RES_BLOCK]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP27:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP28:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP29:%.*]] = getelementptr i32, i32* [[TMP27]], i32 3
+; X32-NEXT: [[TMP30:%.*]] = getelementptr i32, i32* [[TMP28]], i32 3
+; X32-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP29]]
+; X32-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP30]]
+; X32-NEXT: [[TMP33]] = call i32 @llvm.bswap.i32(i32 [[TMP31]])
+; X32-NEXT: [[TMP34]] = call i32 @llvm.bswap.i32(i32 [[TMP32]])
+; X32-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP33]], [[TMP34]]
+; X32-NEXT: br i1 [[TMP35]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X32-NEXT: ret i32 [[PHI_RES]]
+;
+; X64-LABEL: @cmp16(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
+; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]]
+; X64-NEXT: br i1 [[TMP6]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]]
+; X64: res_block:
+; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP4]], [[LOADBB:%.*]] ], [ [[TMP15:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP5]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1]] ]
+; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
+; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP9:%.*]] = bitcast i8* [[X]] to i64*
+; X64-NEXT: [[TMP10:%.*]] = bitcast i8* [[Y]] to i64*
+; X64-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 1
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 1
+; X64-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP11]]
+; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]]
+; X64-NEXT: [[TMP15]] = call i64 @llvm.bswap.i64(i64 [[TMP13]])
+; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
+; X64-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP15]], [[TMP16]]
+; X64-NEXT: br i1 [[TMP17]], label [[ENDBLOCK]], label [[RES_BLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP8]], [[RES_BLOCK]] ]
+; X64-NEXT: ret i32 [[PHI_RES]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
ret i32 %call
@@ -190,8 +1088,25 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq3(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 3)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i16*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i16*
+; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = icmp ne i16 [[TMP2]], [[TMP3]]
+; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; ALL: res_block:
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[X]], i8 2
+; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[Y]], i8 2
+; ALL-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]]
+; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; ALL-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP7]], [[TMP8]]
+; ALL-NEXT: br i1 [[TMP9]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -221,8 +1136,25 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq5(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 5)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; ALL: res_block:
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[X]], i8 4
+; ALL-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[Y]], i8 4
+; ALL-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]]
+; ALL-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; ALL-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP7]], [[TMP8]]
+; ALL-NEXT: br i1 [[TMP9]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -234,8 +1166,27 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq6(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 6)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; ALL: res_block:
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16*
+; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16*
+; ALL-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 2
+; ALL-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
+; ALL-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]]
+; ALL-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; ALL-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]]
+; ALL-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -247,8 +1198,34 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq7(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 7)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; ALL-NEXT: loadbb:
+; ALL-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; ALL-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; ALL-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; ALL: res_block:
+; ALL-NEXT: br label [[ENDBLOCK:%.*]]
+; ALL: loadbb1:
+; ALL-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16*
+; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16*
+; ALL-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 2
+; ALL-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 2
+; ALL-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]]
+; ALL-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; ALL-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]]
+; ALL-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; ALL: loadbb2:
+; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 6
+; ALL-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 6
+; ALL-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; ALL-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]]
+; ALL-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]]
+; ALL-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; ALL: endblock:
+; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; ALL-NEXT: ret i32 [[CONV]]
;
@@ -260,8 +1237,27 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq8(
-; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 8)
-; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
; X32-NEXT: ret i32 [[CONV]]
;
@@ -283,11 +1279,60 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq9(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 9)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq9(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X32-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X32-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; X32-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]]
+; X32-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]]
+; X32-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq9(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[X]], i8 8
+; X64-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[Y]], i8 8
+; X64-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP5]]
+; X64-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
+; X64-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP7]], [[TMP8]]
+; X64-NEXT: br i1 [[TMP9]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
%cmp = icmp eq i32 %call, 0
@@ -296,11 +1341,64 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq10(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 10)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq10(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 4
+; X32-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 4
+; X32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
+; X32-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]]
+; X32-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]]
+; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq10(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 4
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
+; X64-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
%cmp = icmp eq i32 %call, 0
@@ -309,11 +1407,78 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq11(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 11)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq11(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 4
+; X32-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 4
+; X32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
+; X32-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]]
+; X32-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]]
+; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[X]], i8 10
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[Y]], i8 10
+; X32-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]]
+; X32-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP21]], [[TMP22]]
+; X32-NEXT: br i1 [[TMP23]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq11(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i16, i16* [[TMP5]], i16 4
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP6]], i16 4
+; X64-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i16 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 10
+; X64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 10
+; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
%cmp = icmp eq i32 %call, 0
@@ -322,11 +1487,64 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq12(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 12)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq12(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2
+; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2
+; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]]
+; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]]
+; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]]
+; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq12(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
+; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
%cmp = icmp eq i32 %call, 0
@@ -335,11 +1553,78 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq13(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 13)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq13(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2
+; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2
+; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]]
+; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]]
+; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]]
+; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[X]], i8 12
+; X32-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[Y]], i8 12
+; X32-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]]
+; X32-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]]
+; X32-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP21]], [[TMP22]]
+; X32-NEXT: br i1 [[TMP23]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq13(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
+; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[X]], i8 12
+; X64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i8 12
+; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP12]]
+; X64-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP13]]
+; X64-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP14]], [[TMP15]]
+; X64-NEXT: br i1 [[TMP16]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
%cmp = icmp eq i32 %call, 0
@@ -348,11 +1633,82 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq14(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 14)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq14(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2
+; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2
+; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]]
+; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]]
+; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]]
+; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[X]] to i16*
+; X32-NEXT: [[TMP20:%.*]] = bitcast i8* [[Y]] to i16*
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i16, i16* [[TMP19]], i16 6
+; X32-NEXT: [[TMP22:%.*]] = getelementptr i16, i16* [[TMP20]], i16 6
+; X32-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP21]]
+; X32-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP22]]
+; X32-NEXT: [[TMP25:%.*]] = icmp ne i16 [[TMP23]], [[TMP24]]
+; X32-NEXT: br i1 [[TMP25]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq14(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
+; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 6
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 6
+; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
+; X64-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]]
+; X64-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]]
+; X64-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB2]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
%cmp = icmp eq i32 %call, 0
@@ -361,11 +1717,52 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq15(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq15(
+; X32-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 15)
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq15(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 2
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 2
+; X64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X64: loadbb2:
+; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i16*
+; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i16*
+; X64-NEXT: [[TMP14:%.*]] = getelementptr i16, i16* [[TMP12]], i16 6
+; X64-NEXT: [[TMP15:%.*]] = getelementptr i16, i16* [[TMP13]], i16 6
+; X64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP14]]
+; X64-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]]
+; X64-NEXT: [[TMP18:%.*]] = icmp ne i16 [[TMP16]], [[TMP17]]
+; X64-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]]
+; X64: loadbb3:
+; X64-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[X]], i8 14
+; X64-NEXT: [[TMP20:%.*]] = getelementptr i8, i8* [[Y]], i8 14
+; X64-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]]
+; X64-NEXT: [[TMP22:%.*]] = load i8, i8* [[TMP20]]
+; X64-NEXT: [[TMP23:%.*]] = icmp ne i8 [[TMP21]], [[TMP22]]
+; X64-NEXT: br i1 [[TMP23]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
%cmp = icmp eq i32 %call, 0
@@ -374,11 +1771,73 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
}
define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
-; ALL-LABEL: @cmp_eq16(
-; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[X:%.*]], i8* [[Y:%.*]], i64 16)
-; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
-; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
-; ALL-NEXT: ret i32 [[CONV]]
+; X32-LABEL: @cmp_eq16(
+; X32-NEXT: loadbb:
+; X32-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i32*
+; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]]
+; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
+; X32-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP2]], [[TMP3]]
+; X32-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X32: res_block:
+; X32-NEXT: br label [[ENDBLOCK:%.*]]
+; X32: loadbb1:
+; X32-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP7:%.*]] = getelementptr i32, i32* [[TMP5]], i32 1
+; X32-NEXT: [[TMP8:%.*]] = getelementptr i32, i32* [[TMP6]], i32 1
+; X32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]]
+; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
+; X32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], [[TMP10]]
+; X32-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[LOADBB2:%.*]]
+; X32: loadbb2:
+; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP14:%.*]] = getelementptr i32, i32* [[TMP12]], i32 2
+; X32-NEXT: [[TMP15:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2
+; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP14]]
+; X32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP15]]
+; X32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP16]], [[TMP17]]
+; X32-NEXT: br i1 [[TMP18]], label [[RES_BLOCK]], label [[LOADBB3:%.*]]
+; X32: loadbb3:
+; X32-NEXT: [[TMP19:%.*]] = bitcast i8* [[X]] to i32*
+; X32-NEXT: [[TMP20:%.*]] = bitcast i8* [[Y]] to i32*
+; X32-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP19]], i32 3
+; X32-NEXT: [[TMP22:%.*]] = getelementptr i32, i32* [[TMP20]], i32 3
+; X32-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP21]]
+; X32-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP22]]
+; X32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP23]], [[TMP24]]
+; X32-NEXT: br i1 [[TMP25]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X32: endblock:
+; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB3]] ], [ 1, [[RES_BLOCK]] ]
+; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X32-NEXT: ret i32 [[CONV]]
+;
+; X64-LABEL: @cmp_eq16(
+; X64-NEXT: loadbb:
+; X64-NEXT: [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[Y:%.*]] to i64*
+; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP0]]
+; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
+; X64-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]]
+; X64-NEXT: br i1 [[TMP4]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
+; X64: res_block:
+; X64-NEXT: br label [[ENDBLOCK:%.*]]
+; X64: loadbb1:
+; X64-NEXT: [[TMP5:%.*]] = bitcast i8* [[X]] to i64*
+; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y]] to i64*
+; X64-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 1
+; X64-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[TMP6]], i64 1
+; X64-NEXT: [[TMP9:%.*]] = load i64, i64* [[TMP7]]
+; X64-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
+; X64-NEXT: [[TMP11:%.*]] = icmp ne i64 [[TMP9]], [[TMP10]]
+; X64-NEXT: br i1 [[TMP11]], label [[RES_BLOCK]], label [[ENDBLOCK]]
+; X64: endblock:
+; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
+; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
+; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
+; X64-NEXT: ret i32 [[CONV]]
;
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
%cmp = icmp eq i32 %call, 0
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
index b6b7757978263..088b177c2e11a 100644
--- a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@@ -194,7 +194,6 @@ rare.2:
br label %fallthrough
}
-
declare void @slowpath(i32, i32*)
; Make sure we don't end up in an infinite loop after we fail to sink.
@@ -218,3 +217,37 @@ load.i145:
pl_loop.i.i122:
br label %pl_loop.i.i122
}
+
+; Make sure we can sink address computation even
+; if there is a cycle in phi nodes.
+define void @test9(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test9
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br label %header
+
+header:
+ %iv = phi i32 [0, %entry], [%iv.inc, %backedge]
+ %casted.loop = phi i32* [%casted, %entry], [%casted.merged, %backedge]
+ br i1 %cond, label %if.then, label %backedge
+
+if.then:
+ call void @foo(i32 %iv)
+ %addr.1 = getelementptr inbounds i64, i64* %base, i64 5
+ %casted.1 = bitcast i64* %addr.1 to i32*
+ br label %backedge
+
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK: getelementptr i8, {{.+}} 40
+ %casted.merged = phi i32* [%casted.loop, %header], [%casted.1, %if.then]
+ %v = load i32, i32* %casted.merged, align 4
+ call void @foo(i32 %v)
+ %iv.inc = add i32 %iv, 1
+ %cmp = icmp slt i32 %iv.inc, 1000
+ br i1 %cmp, label %header, label %exit
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll b/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll
new file mode 100644
index 0000000000000..57dbdd8831902
--- /dev/null
+++ b/test/Transforms/EarlyCSE/globalsaa-memoryssa.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -globals-aa -early-cse-memssa | FileCheck %s
+
+define i16 @f1() readonly {
+ ret i16 0
+}
+
+declare void @f2()
+
+; Check that EarlyCSE correctly handles function calls that don't have
+; a MemoryAccess. In this case the calls to @f1 have no
+; MemoryAccesses since globals-aa determines that @f1 doesn't
+; read/write memory at all.
+
+define void @f3() {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT: [[CALL1:%.*]] = call i16 @f1()
+; CHECK-NEXT: call void @f2()
+; CHECK-NEXT: ret void
+;
+ %call1 = call i16 @f1()
+ call void @f2()
+ %call2 = call i16 @f1()
+ ret void
+}
diff --git a/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll b/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll
new file mode 100644
index 0000000000000..513379d0bd017
--- /dev/null
+++ b/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll
@@ -0,0 +1,79 @@
+; This test checks if debug loc is propagated to load/store created by GVN/Instcombine.
+; RUN: opt < %s -gvn -S | FileCheck %s --check-prefixes=ALL,GVN
+; RUN: opt < %s -gvn -instcombine -S | FileCheck %s --check-prefixes=ALL,INSTCOMBINE
+
+; struct node {
+; int *v;
+; struct desc *descs;
+; };
+
+; struct desc {
+; struct node *node;
+; };
+
+; extern int bar(void *v, void* n);
+
+; int test(struct desc *desc)
+; {
+; void *v, *n;
+; v = !desc ? ((void *)0) : desc->node->v; // Line 15
+; n = &desc->node->descs[0]; // Line 16
+; return bar(v, n);
+; }
+
+; Line 16, Column 13:
+; n = &desc->node->descs[0];
+; ^
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.desc = type { %struct.node* }
+%struct.node = type { i32*, %struct.desc* }
+
+define i32 @test(%struct.desc* readonly %desc) local_unnamed_addr #0 !dbg !4 {
+entry:
+ %tobool = icmp eq %struct.desc* %desc, null
+ br i1 %tobool, label %cond.end, label %cond.false, !dbg !9
+; ALL: br i1 %tobool, label %entry.cond.end_crit_edge, label %cond.false, !dbg [[LOC_15_6:![0-9]+]]
+; ALL: entry.cond.end_crit_edge:
+; GVN: %.pre = load %struct.node*, %struct.node** null, align 8, !dbg [[LOC_16_13:![0-9]+]]
+; INSTCOMBINE:store %struct.node* undef, %struct.node** null, align 536870912, !dbg [[LOC_16_13:![0-9]+]]
+
+cond.false:
+ %0 = bitcast %struct.desc* %desc to i8***, !dbg !11
+ %1 = load i8**, i8*** %0, align 8, !dbg !11
+ %2 = load i8*, i8** %1, align 8
+ br label %cond.end, !dbg !9
+
+cond.end:
+ %3 = phi i8* [ %2, %cond.false ], [ null, %entry ], !dbg !9
+ %node2 = getelementptr inbounds %struct.desc, %struct.desc* %desc, i64 0, i32 0
+ %4 = load %struct.node*, %struct.node** %node2, align 8, !dbg !10
+ %descs = getelementptr inbounds %struct.node, %struct.node* %4, i64 0, i32 1
+ %5 = bitcast %struct.desc** %descs to i8**
+ %6 = load i8*, i8** %5, align 8
+ %call = tail call i32 @bar(i8* %3, i8* %6)
+ ret i32 %call
+}
+
+declare i32 @bar(i8*, i8*) local_unnamed_addr #1
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.c", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 12, type: !5, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{}
+!9 = !DILocation(line: 15, column: 6, scope: !4)
+!10 = !DILocation(line: 16, column: 13, scope: !4)
+!11 = !DILocation(line: 15, column: 34, scope: !4)
+
+;ALL: [[SCOPE:![0-9]+]] = distinct !DISubprogram(name: "test",{{.*}}
+;ALL: [[LOC_15_6]] = !DILocation(line: 15, column: 6, scope: [[SCOPE]])
+;ALL: [[LOC_16_13]] = !DILocation(line: 16, column: 13, scope: [[SCOPE]])
diff --git a/test/Transforms/GVN/PRE/phi-translate.ll b/test/Transforms/GVN/PRE/phi-translate.ll
index 1f6c7c8d33ea7..55f5fd6465b68 100644
--- a/test/Transforms/GVN/PRE/phi-translate.ll
+++ b/test/Transforms/GVN/PRE/phi-translate.ll
@@ -6,12 +6,12 @@ target datalayout = "e-p:64:64:64"
; CHECK: entry.end_crit_edge:
; CHECK: %[[INDEX:[a-z0-9.]+]] = sext i32 %x to i64{{$}}
; CHECK: %[[ADDRESS:[a-z0-9.]+]] = getelementptr [100 x i32], [100 x i32]* @G, i64 0, i64 %[[INDEX]]{{$}}
-; CHECK: %n.pre = load i32, i32* %[[ADDRESS]]{{$}}
+; CHECK: %n.pre = load i32, i32* %[[ADDRESS]], !dbg [[N_LOC:![0-9]+]]
; CHECK: br label %end
; CHECK: then:
; CHECK: store i32 %z
; CHECK: end:
-; CHECK: %n = phi i32 [ %n.pre, %entry.end_crit_edge ], [ %z, %then ], !dbg [[N_LOC:![0-9]+]]
+; CHECK: %n = phi i32 [ %n.pre, %entry.end_crit_edge ], [ %z, %then ], !dbg [[N_LOC]]
; CHECK: ret i32 %n
; CHECK: [[N_LOC]] = !DILocation(line: 47, column: 1, scope: !{{.*}})
diff --git a/test/Transforms/GlobalOpt/pr33686.ll b/test/Transforms/GlobalOpt/pr33686.ll
new file mode 100644
index 0000000000000..d6bb98735f4e8
--- /dev/null
+++ b/test/Transforms/GlobalOpt/pr33686.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -globalopt %s | FileCheck %s
+
+@glob = external global i16, align 1
+
+define void @beth() {
+; CHECK-LABEL: @beth(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+;
+entry:
+ ret void
+
+notreachable:
+ %patatino = select i1 undef, i16* @glob, i16* %patatino
+ br label %notreachable
+}
diff --git a/test/Transforms/IRCE/eq_ne.ll b/test/Transforms/IRCE/eq_ne.ll
new file mode 100644
index 0000000000000..1b1ffe6b94ba7
--- /dev/null
+++ b/test/Transforms/IRCE/eq_ne.ll
@@ -0,0 +1,257 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+
+; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_06: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_07: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK-NOT: irce: in function test_08: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+
+; Show that IRCE can turn 'ne' condition to 'slt' in increasing IV.
+define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_01
+; CHECK: main.exit.selector:
+; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ]
+; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100
+; CHECK-NEXT: br i1 [[COND]]
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ne i32 %idx.next, 100
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that if n is not known to be greater than the starting value, IRCE
+; doesn't apply.
+define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_02(
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ne i32 %idx.next, -100
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that IRCE can turn 'eq' condition to 'sge' in increasing IV.
+define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_03(
+; CHECK: main.exit.selector:
+; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ]
+; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100
+; CHECK-NEXT: br i1 [[COND]]
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp eq i32 %idx.next, 100
+ br i1 %next, label %exit, label %loop
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that if n is not known to be greater than the starting value, IRCE
+; doesn't apply.
+define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_04(
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp eq i32 %idx.next, -100
+ br i1 %next, label %exit, label %loop
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that IRCE can turn 'ne' condition to 'sgt' in decreasing IV.
+define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_05(
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
+; CHECK-NEXT: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], 0
+; CHECK-NEXT: br i1 [[COND]]
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ne i32 %idx.next, 0
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that IRCE cannot turn 'ne' condition to 'sgt' in decreasing IV if the end
+; value is not proved to be less than the start value.
+define void @test_06(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_06(
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp ne i32 %idx.next, 120
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that IRCE can turn 'eq' condition to 'slt' in decreasing IV.
+define void @test_07(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_07(
+; CHECK: preloop.exit.selector:
+; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ]
+; CHECK-NEXT: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], 0
+; CHECK-NEXT: br i1 [[COND]]
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp eq i32 %idx.next, 0
+ br i1 %next, label %exit, label %loop
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Show that IRCE cannot turn 'eq' condition to 'slt' in decreasing IV if the end
+; value is not proved to be less than the start value.
+define void @test_08(i32* %arr, i32* %a_len_ptr) #0 {
+
+; CHECK: test_08(
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp eq i32 %idx.next, 120
+ br i1 %next, label %exit, label %loop
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+!0 = !{i32 0, i32 50}
+!1 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/Transforms/IRCE/pre_post_loops.ll b/test/Transforms/IRCE/pre_post_loops.ll
new file mode 100644
index 0000000000000..2cd2e29104fe9
--- /dev/null
+++ b/test/Transforms/IRCE/pre_post_loops.ll
@@ -0,0 +1,117 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+
+; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+
+; Iterate from 0 to SINT_MAX, check that the post-loop is generated.
+define void @test_01(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: test_01(
+; CHECK: entry:
+; CHECK-NEXT: %exit.mainloop.at = load i32, i32* %a_len_ptr
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ]
+; CHECK-NEXT: %idx.next = add i32 %idx, 1
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %exit.mainloop.at
+; CHECK-NEXT: br i1 true, label %in.bounds,
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp slt i32 %idx.next, 2147483647
+; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 %idx.next, %exit.mainloop.at
+; CHECK-NEXT: br i1 [[COND]], label %loop, label %main.exit.selector
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: %idx.copy = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 0, %entry ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: br label %postloop
+; CHECK: postloop:
+; CHECK-NEXT: br label %loop.postloop
+; CHECK: loop.postloop:
+; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ]
+; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1
+; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %exit.mainloop.at
+; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.postloop:
+; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
+; CHECK-NEXT: store i32 0, i32* %addr.postloop
+; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, 2147483647
+; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, 2147483647
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+; Iterate from SINT_MAX to 0, check that the pre-loop is generated.
+define void @test_02(i32* %arr, i32* %a_len_ptr) {
+
+; CHECK: test_02(
+; CHECK: entry:
+; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0
+; CHECH-NEXT: br i1 true, label %loop.preloop.preheader
+; CHECK: mainloop:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: %idx = phi i32 [ %idx.preloop.copy, %mainloop ], [ %idx.next, %in.bounds ]
+; CHECK-NEXT: %idx.next = add i32 %idx, -1
+; CHECK-NEXT: %abc = icmp slt i32 %idx, %len
+; CHECK-NEXT: br i1 true, label %in.bounds
+; CHECK: in.bounds:
+; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx
+; CHECK-NEXT: store i32 0, i32* %addr
+; CHECK-NEXT: %next = icmp sgt i32 %idx.next, -1
+; CHECK-NEXT: br i1 %next, label %loop, label %exit.loopexit
+; CHECK: loop.preloop:
+; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 2147483647, %loop.preloop.preheader ]
+; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, -1
+; CHECK-NEXT: %abc.preloop = icmp slt i32 %idx.preloop, %len
+; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit
+; CHECK: in.bounds.preloop:
+; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+; CHECK-NEXT: store i32 0, i32* %addr.preloop
+; CHECK-NEXT: %next.preloop = icmp sgt i32 %idx.next.preloop, -1
+; CHECK-NEXT: [[COND:%[^ ]+]] = icmp sgt i32 %idx.next.preloop, -1
+; CHECK-NEXT: br i1 [[COND]], label %loop.preloop, label %preloop.exit.selector
+
+entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 2147483647, %entry ], [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, -1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp sgt i32 %idx.next, -1
+ br i1 %next, label %loop, label %exit
+
+out.of.bounds:
+ ret void
+
+exit:
+ ret void
+}
+
+!0 = !{i32 0, i32 50}
diff --git a/test/Transforms/Inline/AArch64/ext.ll b/test/Transforms/Inline/AArch64/ext.ll
new file mode 100644
index 0000000000000..04095c04ee869
--- /dev/null
+++ b/test/Transforms/Inline/AArch64/ext.ll
@@ -0,0 +1,249 @@
+; REQUIRES: asserts
+; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define i32 @outer1(i32* %ptr, i32 %i) {
+ %C = call i32 @inner1(i32* %ptr, i32 %i)
+ ret i32 %C
+}
+
+; sext can be folded into gep.
+; CHECK: Analyzing call of inner1
+; CHECK: NumInstructionsSimplified: 3
+; CHECK: NumInstructions: 4
+define i32 @inner1(i32* %ptr, i32 %i) {
+ %E = sext i32 %i to i64
+ %G = getelementptr inbounds i32, i32* %ptr, i64 %E
+ %L = load i32, i32* %G
+ ret i32 %L
+}
+
+define i32 @outer2(i32* %ptr, i32 %i) {
+ %C = call i32 @inner2(i32* %ptr, i32 %i)
+ ret i32 %C
+}
+
+; zext from i32 to i64 is free.
+; CHECK: Analyzing call of inner2
+; CHECK: NumInstructionsSimplified: 3
+; CHECK: NumInstructions: 4
+define i32 @inner2(i32* %ptr, i32 %i) {
+ %E = zext i32 %i to i64
+ %G = getelementptr inbounds i32, i32* %ptr, i64 %E
+ %L = load i32, i32* %G
+ ret i32 %L
+}
+
+define i32 @outer3(i32* %ptr, i16 %i) {
+ %C = call i32 @inner3(i32* %ptr, i16 %i)
+ ret i32 %C
+}
+
+; zext can be folded into gep.
+; CHECK: Analyzing call of inner3
+; CHECK: NumInstructionsSimplified: 3
+; CHECK: NumInstructions: 4
+define i32 @inner3(i32* %ptr, i16 %i) {
+ %E = zext i16 %i to i64
+ %G = getelementptr inbounds i32, i32* %ptr, i64 %E
+ %L = load i32, i32* %G
+ ret i32 %L
+}
+
+define i16 @outer4(i8* %ptr) {
+ %C = call i16 @inner4(i8* %ptr)
+ ret i16 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner4
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i16 @inner4(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i16
+ ret i16 %E
+}
+
+define i16 @outer5(i8* %ptr) {
+ %C = call i16 @inner5(i8* %ptr)
+ ret i16 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner5
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i16 @inner5(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = sext i8 %L to i16
+ ret i16 %E
+}
+
+define i32 @outer6(i8* %ptr) {
+ %C = call i32 @inner6(i8* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner6
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner6(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer7(i8* %ptr) {
+ %C = call i32 @inner7(i8* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner7
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner7(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = sext i8 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer8(i16* %ptr) {
+ %C = call i32 @inner8(i16* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner8
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner8(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = zext i16 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer9(i16* %ptr) {
+ %C = call i32 @inner9(i16* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner9
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner9(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = sext i16 %L to i32
+ ret i32 %E
+}
+
+define i64 @outer10(i8* %ptr) {
+ %C = call i64 @inner10(i8* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner10
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner10(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer11(i8* %ptr) {
+ %C = call i64 @inner11(i8* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner11
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner11(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = sext i8 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer12(i16* %ptr) {
+ %C = call i64 @inner12(i16* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner12
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner12(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = zext i16 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer13(i16* %ptr) {
+ %C = call i64 @inner13(i16* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner13
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner13(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = sext i16 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer14(i32* %ptr) {
+ %C = call i64 @inner14(i32* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner14
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner14(i32* %ptr) {
+ %L = load i32, i32* %ptr
+ %E = zext i32 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer15(i32* %ptr) {
+ %C = call i64 @inner15(i32* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner15
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner15(i32* %ptr) {
+ %L = load i32, i32* %ptr
+ %E = sext i32 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer16(i32 %V1, i64 %V2) {
+ %C = call i64 @inner16(i32 %V1, i64 %V2)
+ ret i64 %C
+}
+
+; sext can be folded into shl.
+; CHECK: Analyzing call of inner16
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 4
+define i64 @inner16(i32 %V1, i64 %V2) {
+ %E = sext i32 %V1 to i64
+ %S = shl i64 %E, 3
+ %A = add i64 %V2, %S
+ ret i64 %A
+}
diff --git a/test/Transforms/Inline/PowerPC/ext.ll b/test/Transforms/Inline/PowerPC/ext.ll
new file mode 100644
index 0000000000000..f7a409467b2c0
--- /dev/null
+++ b/test/Transforms/Inline/PowerPC/ext.ll
@@ -0,0 +1,140 @@
+; REQUIRES: asserts
+; RUN: opt -inline -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64le-ibm-linux-gnu"
+
+define i16 @outer1(i8* %ptr) {
+ %C = call i16 @inner1(i8* %ptr)
+ ret i16 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner1
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i16 @inner1(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i16
+ ret i16 %E
+}
+
+define i32 @outer2(i8* %ptr) {
+ %C = call i32 @inner2(i8* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner2
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner2(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer3(i16* %ptr) {
+ %C = call i32 @inner3(i16* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner3
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner3(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = zext i16 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer4(i16* %ptr) {
+ %C = call i32 @inner4(i16* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner4
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner4(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = sext i16 %L to i32
+ ret i32 %E
+}
+
+define i64 @outer5(i8* %ptr) {
+ %C = call i64 @inner5(i8* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner5
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner5(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer6(i16* %ptr) {
+ %C = call i64 @inner6(i16* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner6
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner6(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = zext i16 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer7(i16* %ptr) {
+ %C = call i64 @inner7(i16* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner7
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner7(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = sext i16 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer8(i32* %ptr) {
+ %C = call i64 @inner8(i32* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner8
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner8(i32* %ptr) {
+ %L = load i32, i32* %ptr
+ %E = zext i32 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer9(i32* %ptr) {
+ %C = call i64 @inner9(i32* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner9
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner9(i32* %ptr) {
+ %L = load i32, i32* %ptr
+ %E = sext i32 %L to i64
+ ret i64 %E
+}
diff --git a/test/Transforms/Inline/PowerPC/lit.local.cfg b/test/Transforms/Inline/PowerPC/lit.local.cfg
new file mode 100644
index 0000000000000..5d33887ff0a48
--- /dev/null
+++ b/test/Transforms/Inline/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/Inline/X86/ext.ll b/test/Transforms/Inline/X86/ext.ll
new file mode 100644
index 0000000000000..bffda38527998
--- /dev/null
+++ b/test/Transforms/Inline/X86/ext.ll
@@ -0,0 +1,201 @@
+; REQUIRES: asserts
+; RUN: opt -inline -mtriple=x86_64-unknown-unknown -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+define i32 @outer1(i32* %ptr, i32 %i) {
+ %C = call i32 @inner1(i32* %ptr, i32 %i)
+ ret i32 %C
+}
+
+; zext from i32 to i64 is free.
+; CHECK: Analyzing call of inner1
+; CHECK: NumInstructionsSimplified: 3
+; CHECK: NumInstructions: 4
+define i32 @inner1(i32* %ptr, i32 %i) {
+ %E = zext i32 %i to i64
+ %G = getelementptr inbounds i32, i32* %ptr, i64 %E
+ %L = load i32, i32* %G
+ ret i32 %L
+}
+
+define i16 @outer2(i8* %ptr) {
+ %C = call i16 @inner2(i8* %ptr)
+ ret i16 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner2
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i16 @inner2(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i16
+ ret i16 %E
+}
+
+define i16 @outer3(i8* %ptr) {
+ %C = call i16 @inner3(i8* %ptr)
+ ret i16 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner3
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i16 @inner3(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = sext i8 %L to i16
+ ret i16 %E
+}
+
+define i32 @outer4(i8* %ptr) {
+ %C = call i32 @inner4(i8* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner4
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner4(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer5(i8* %ptr) {
+ %C = call i32 @inner5(i8* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner5
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner5(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = sext i8 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer6(i16* %ptr) {
+ %C = call i32 @inner6(i16* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner6
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner6(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = zext i16 %L to i32
+ ret i32 %E
+}
+
+define i32 @outer7(i16* %ptr) {
+ %C = call i32 @inner7(i16* %ptr)
+ ret i32 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner7
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i32 @inner7(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = sext i16 %L to i32
+ ret i32 %E
+}
+
+define i64 @outer8(i8* %ptr) {
+ %C = call i64 @inner8(i8* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner8
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner8(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = zext i8 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer9(i8* %ptr) {
+ %C = call i64 @inner9(i8* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner9
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner9(i8* %ptr) {
+ %L = load i8, i8* %ptr
+ %E = sext i8 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer10(i16* %ptr) {
+ %C = call i64 @inner10(i16* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner10
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner10(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = zext i16 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer11(i16* %ptr) {
+ %C = call i64 @inner11(i16* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner11
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner11(i16* %ptr) {
+ %L = load i16, i16* %ptr
+ %E = sext i16 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer12(i32* %ptr) {
+ %C = call i64 @inner12(i32* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner12
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner12(i32* %ptr) {
+ %L = load i32, i32* %ptr
+ %E = zext i32 %L to i64
+ ret i64 %E
+}
+
+define i64 @outer13(i32* %ptr) {
+ %C = call i64 @inner13(i32* %ptr)
+ ret i64 %C
+}
+
+; It is an ExtLoad.
+; CHECK: Analyzing call of inner13
+; CHECK: NumInstructionsSimplified: 2
+; CHECK: NumInstructions: 3
+define i64 @inner13(i32* %ptr) {
+ %L = load i32, i32* %ptr
+ %E = sext i32 %L to i64
+ ret i64 %E
+}
diff --git a/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll b/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll
index 3c4e08b5b515c..9053578175094 100644
--- a/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll
+++ b/test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll
@@ -1,7 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: llvm.umul.with.overflow
define i32 @sterix(i32, i8, i64) {
+; CHECK-LABEL: @sterix(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[TMP0:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[CONV1]], 1945964878
+; CHECK-NEXT: [[SH_PROM:%.*]] = trunc i64 [[TMP2:%.*]] to i32
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], [[SH_PROM]]
+; CHECK-NEXT: [[CONV2:%.*]] = zext i32 [[SHR]] to i64
+; CHECK-NEXT: [[MUL3:%.*]] = mul nuw nsw i64 [[CONV]], [[CONV2]]
+; CHECK-NEXT: [[CONV6:%.*]] = and i64 [[MUL3]], 4294967295
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[CONV6]], [[MUL3]]
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[LOR_RHS:%.*]], label [[LOR_END:%.*]]
+; CHECK: lor.rhs:
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[MUL3]], [[TMP2]]
+; CHECK-NEXT: [[CONV4:%.*]] = trunc i64 [[AND]] to i32
+; CHECK-NEXT: [[TOBOOL7:%.*]] = icmp eq i32 [[CONV4]], 0
+; CHECK-NEXT: [[PHITMP:%.*]] = zext i1 [[TOBOOL7]] to i32
+; CHECK-NEXT: br label [[LOR_END]]
+; CHECK: lor.end:
+; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[PHITMP]], [[LOR_RHS]] ]
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
entry:
%conv = zext i32 %0 to i64
%conv1 = sext i8 %1 to i32
diff --git a/test/Transforms/InstCombine/and-not-or.ll b/test/Transforms/InstCombine/and-not-or.ll
deleted file mode 100644
index a42140be28052..0000000000000
--- a/test/Transforms/InstCombine/and-not-or.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep "and i32 %x, %y" | count 4
-; RUN: opt < %s -instcombine -S | not grep "or"
-
-define i32 @func1(i32 %x, i32 %y) nounwind {
-entry:
- %n = xor i32 %y, -1
- %o = or i32 %n, %x
- %a = and i32 %o, %y
- ret i32 %a
-}
-
-define i32 @func2(i32 %x, i32 %y) nounwind {
-entry:
- %n = xor i32 %y, -1
- %o = or i32 %x, %n
- %a = and i32 %o, %y
- ret i32 %a
-}
-
-define i32 @func3(i32 %x, i32 %y) nounwind {
-entry:
- %n = xor i32 %y, -1
- %o = or i32 %n, %x
- %a = and i32 %y, %o
- ret i32 %a
-}
-
-define i32 @func4(i32 %x, i32 %y) nounwind {
-entry:
- %n = xor i32 %y, -1
- %o = or i32 %x, %n
- %a = and i32 %y, %o
- ret i32 %a
-}
diff --git a/test/Transforms/InstCombine/and.ll b/test/Transforms/InstCombine/and.ll
index 7bb9b95b31797..c12662d4db0e1 100644
--- a/test/Transforms/InstCombine/and.ll
+++ b/test/Transforms/InstCombine/and.ll
@@ -628,3 +628,195 @@ define i32 @test43(i32 %a, i32 %c, i32 %d) {
%and = and i32 %or, %xor
ret i32 %and
}
+
+; (~y | x) & y -> x & y
+define i32 @test44(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[A]]
+;
+ %n = xor i32 %y, -1
+ %o = or i32 %n, %x
+ %a = and i32 %o, %y
+ ret i32 %a
+}
+
+; (x | ~y) & y -> x & y
+define i32 @test45(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[A]]
+;
+ %n = xor i32 %y, -1
+ %o = or i32 %x, %n
+ %a = and i32 %o, %y
+ ret i32 %a
+}
+
+; y & (~y | x) -> y | x
+define i32 @test46(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[A]]
+;
+ %n = xor i32 %y, -1
+ %o = or i32 %n, %x
+ %a = and i32 %y, %o
+ ret i32 %a
+}
+
+; y & (x | ~y) -> y | x
+define i32 @test47(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: ret i32 [[A]]
+;
+ %n = xor i32 %y, -1
+ %o = or i32 %x, %n
+ %a = and i32 %y, %o
+ ret i32 %a
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (X & (Y | ~X)) -> (X & Y), where 'not' is an inverted cmp
+
+define i1 @and_orn_cmp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @and_orn_cmp_1(
+; CHECK-NEXT: [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[AND]]
+;
+ %x = icmp sgt i32 %a, %b
+ %x_inv = icmp sle i32 %a, %b
+ %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering
+ %or = or i1 %y, %x_inv
+ %and = and i1 %x, %or
+ ret i1 %and
+}
+
+; Commute the 'and':
+; ((Y | ~X) & X) -> (X & Y), where 'not' is an inverted cmp
+
+define <2 x i1> @and_orn_cmp_2(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @and_orn_cmp_2(
+; CHECK-NEXT: [[X:%.*]] = icmp sge <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt <2 x i32> [[C:%.*]], <i32 42, i32 47>
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[Y]], [[X]]
+; CHECK-NEXT: ret <2 x i1> [[AND]]
+;
+ %x = icmp sge <2 x i32> %a, %b
+ %x_inv = icmp slt <2 x i32> %a, %b
+ %y = icmp ugt <2 x i32> %c, <i32 42, i32 47> ; thwart complexity-based ordering
+ %or = or <2 x i1> %y, %x_inv
+ %and = and <2 x i1> %or, %x
+ ret <2 x i1> %and
+}
+
+; Commute the 'or':
+; (X & (~X | Y)) -> (X & Y), where 'not' is an inverted cmp
+
+define i1 @and_orn_cmp_3(i72 %a, i72 %b, i72 %c) {
+; CHECK-LABEL: @and_orn_cmp_3(
+; CHECK-NEXT: [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[AND]]
+;
+ %x = icmp ugt i72 %a, %b
+ %x_inv = icmp ule i72 %a, %b
+ %y = icmp ugt i72 %c, 42 ; thwart complexity-based ordering
+ %or = or i1 %x_inv, %y
+ %and = and i1 %x, %or
+ ret i1 %and
+}
+
+; Commute the 'and':
+; ((~X | Y) & X) -> (X & Y), where 'not' is an inverted cmp
+
+define <3 x i1> @or_andn_cmp_4(<3 x i32> %a, <3 x i32> %b, <3 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_4(
+; CHECK-NEXT: [[X:%.*]] = icmp eq <3 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt <3 x i32> [[C:%.*]], <i32 42, i32 43, i32 -1>
+; CHECK-NEXT: [[AND:%.*]] = and <3 x i1> [[Y]], [[X]]
+; CHECK-NEXT: ret <3 x i1> [[AND]]
+;
+ %x = icmp eq <3 x i32> %a, %b
+ %x_inv = icmp ne <3 x i32> %a, %b
+ %y = icmp ugt <3 x i32> %c, <i32 42, i32 43, i32 -1> ; thwart complexity-based ordering
+ %or = or <3 x i1> %x_inv, %y
+ %and = and <3 x i1> %or, %x
+ ret <3 x i1> %and
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (~X & (Y | X)) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_1(i37 %a, i37 %b, i37 %c) {
+; CHECK-LABEL: @andn_or_cmp_1(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[X_INV]], [[Y]]
+; CHECK-NEXT: ret i1 [[AND]]
+;
+ %x = icmp sgt i37 %a, %b
+ %x_inv = icmp sle i37 %a, %b
+ %y = icmp ugt i37 %c, 42 ; thwart complexity-based ordering
+ %or = or i1 %y, %x
+ %and = and i1 %x_inv, %or
+ ret i1 %and
+}
+
+; Commute the 'and':
+; ((Y | X) & ~X) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_2(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: @andn_or_cmp_2(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[Y]], [[X_INV]]
+; CHECK-NEXT: ret i1 [[AND]]
+;
+ %x = icmp sge i16 %a, %b
+ %x_inv = icmp slt i16 %a, %b
+ %y = icmp ugt i16 %c, 42 ; thwart complexity-based ordering
+ %or = or i1 %y, %x
+ %and = and i1 %or, %x_inv
+ ret i1 %and
+}
+
+; Commute the 'or':
+; (~X & (X | Y)) -> (~X & Y), where 'not' is an inverted cmp
+
+define <4 x i1> @andn_or_cmp_3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @andn_or_cmp_3(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt <4 x i32> [[C:%.*]], <i32 42, i32 0, i32 1, i32 -1>
+; CHECK-NEXT: [[AND:%.*]] = and <4 x i1> [[X_INV]], [[Y]]
+; CHECK-NEXT: ret <4 x i1> [[AND]]
+;
+ %x = icmp ugt <4 x i32> %a, %b
+ %x_inv = icmp ule <4 x i32> %a, %b
+ %y = icmp ugt <4 x i32> %c, <i32 42, i32 0, i32 1, i32 -1> ; thwart complexity-based ordering
+ %or = or <4 x i1> %x, %y
+ %and = and <4 x i1> %x_inv, %or
+ ret <4 x i1> %and
+}
+
+; Commute the 'and':
+; ((X | Y) & ~X) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_4(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @andn_or_cmp_4(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[Y]], [[X_INV]]
+; CHECK-NEXT: ret i1 [[AND]]
+;
+ %x = icmp eq i32 %a, %b
+ %x_inv = icmp ne i32 %a, %b
+ %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering
+ %or = or i1 %x, %y
+ %and = and i1 %or, %x_inv
+ ret i1 %and
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 001ac58891e46..15772d158f624 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -98,8 +98,7 @@ define i64 @test9(i64 %x) {
; combine -x & 1 into x & 1
define <2 x i64> @test9vec(<2 x i64> %x) {
; CHECK-LABEL: @test9vec(
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i64> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[SUB]], <i64 1, i64 1>
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> %x, <i64 1, i64 1>
; CHECK-NEXT: ret <2 x i64> [[AND]]
;
%sub = sub nsw <2 x i64> <i64 0, i64 0>, %x
@@ -119,6 +118,88 @@ define i64 @test10(i64 %x) {
ret i64 %add
}
+; (1 << x) & 1 --> zext(x == 0)
+
+define i8 @and1_shl1_is_cmp_eq_0(i8 %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 %x, 0
+; CHECK-NEXT: [[AND:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT: ret i8 [[AND]]
+;
+ %sh = shl i8 1, %x
+ %and = and i8 %sh, 1
+ ret i8 %and
+}
+
+; Don't do it if the shift has another use.
+
+define i8 @and1_shl1_is_cmp_eq_0_multiuse(i8 %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_multiuse(
+; CHECK-NEXT: [[SH:%.*]] = shl i8 1, %x
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[SH]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SH]], [[AND]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %sh = shl i8 1, %x
+ %and = and i8 %sh, 1
+ %add = add i8 %sh, %and
+ ret i8 %add
+}
+
+; (1 << x) & 1 --> zext(x == 0)
+
+define <2 x i8> @and1_shl1_is_cmp_eq_0_vec(<2 x i8> %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT: ret <2 x i8> [[AND]]
+;
+ %sh = shl <2 x i8> <i8 1, i8 1>, %x
+ %and = and <2 x i8> %sh, <i8 1, i8 1>
+ ret <2 x i8> %and
+}
+
+; (1 >> x) & 1 --> zext(x == 0)
+
+define i8 @and1_lshr1_is_cmp_eq_0(i8 %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 %x, 0
+; CHECK-NEXT: [[AND:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT: ret i8 [[AND]]
+;
+ %sh = lshr i8 1, %x
+ %and = and i8 %sh, 1
+ ret i8 %and
+}
+
+; Don't do it if the shift has another use.
+
+define i8 @and1_lshr1_is_cmp_eq_0_multiuse(i8 %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_multiuse(
+; CHECK-NEXT: [[SH:%.*]] = lshr i8 1, %x
+; CHECK-NEXT: [[AND:%.*]] = and i8 [[SH]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SH]], [[AND]]
+; CHECK-NEXT: ret i8 [[ADD]]
+;
+ %sh = lshr i8 1, %x
+ %and = and i8 %sh, 1
+ %add = add i8 %sh, %and
+ ret i8 %add
+}
+
+; (1 >> x) & 1 --> zext(x == 0)
+
+define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT: ret <2 x i8> [[AND]]
+;
+ %sh = lshr <2 x i8> <i8 1, i8 1>, %x
+ %and = and <2 x i8> %sh, <i8 1, i8 1>
+ ret <2 x i8> %and
+}
+
; The add in this test is unnecessary because the LSBs of the LHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
define i32 @test11(i32 %a, i32 %b) {
; CHECK-LABEL: @test11(
diff --git a/test/Transforms/InstCombine/element-atomic-memintrins.ll b/test/Transforms/InstCombine/element-atomic-memintrins.ll
new file mode 100644
index 0000000000000..2e3bfd7b721d6
--- /dev/null
+++ b/test/Transforms/InstCombine/element-atomic-memintrins.ll
@@ -0,0 +1,98 @@
+;; Placeholder tests that will fail once element atomic @llvm.mem[move|set] instrinsics have
+;; been added to the MemIntrinsic class hierarchy. These will act as a reminder to
+;; verify that inst combine handles these intrinsics properly once they have been
+;; added to that class hierarchy.
+
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+;; ---- memset -----
+
+; Ensure 0-length memset isn't removed
+define void @test_memset_zero_length(i8* %dest) {
+ ; CHECK-LABEL: test_memset_zero_length
+ ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1)
+ ; CHECK-NEXT: ret void
+ call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1)
+ ret void
+}
+
+; Ensure that small-sized memsets don't convert to stores
+define void @test_memset_to_store(i8* %dest) {
+ ; CHECK-LABEL: test_memset_to_store
+ ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1)
+ ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1)
+ ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1)
+ ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1)
+ ; CHECK-NEXT: ret void
+ call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1)
+ call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1)
+ call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1)
+ call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1)
+ ret void
+}
+
+declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly
+
+
+;; =========================================
+;; ----- memmove ------
+
+; memmove from a global constant source does not become memcpy
+@gconst = constant [8 x i8] c"0123456\00"
+define void @test_memmove_to_memcpy(i8* %dest) {
+ ; CHECK-LABEL: test_memmove_to_memcpy
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([8 x i8], [8 x i8]* @gconst, i64 0, i64 0), i32 8, i32 1)
+ ; CHECK-NEXT: ret void
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([8 x i8], [8 x i8]* @gconst, i64 0, i64 0), i32 8, i32 1)
+ ret void
+}
+
+define void @test_memmove_zero_length(i8* %dest, i8* %src) {
+ ; CHECK-LABEL: test_memmove_zero_length
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
+ ; CHECK-NEXT: ret void
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
+ ret void
+}
+
+; memmove with src==dest is removed
+define void @test_memmove_removed(i8* %srcdest, i32 %sz) {
+ ; CHECK-LABEL: test_memmove_removed
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
+ ; CHECK-NEXT: ret void
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
+ ret void
+}
+
+; memmove with a small constant length is converted to a load/store pair
+define void @test_memmove_loadstore(i8* %dest, i8* %src) {
+ ; CHECK-LABEL: test_memmove_loadstore
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
+ ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
+ ; CHECK-NEXT: ret void
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
+ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
+ ret void
+}
+
+declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
index faae2016e2075..aa95cc5a13164 100644
--- a/test/Transforms/InstCombine/icmp-logical.ll
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -1,159 +1,138 @@
; RUN: opt -instcombine -S -o - %s | FileCheck %s
define i1 @masked_and_notallzeroes(i32 %A) {
-; CHECK-LABEL: @masked_and_notallzeroes
-; CHECK: [[MASK:%.*]] = and i32 %A, 7
-; CHECK: icmp ne i32 [[MASK]], 0
-; CHECK-NOT: and i32 %A, 39
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_and_notallzeroes(
+; CHECK-NEXT: [[MASK1:%.*]] = and i32 %A, 7
+; CHECK-NEXT: [[TST1:%.*]] = icmp ne i32 [[MASK1]], 0
+; CHECK-NEXT: ret i1 [[TST1]]
+;
%mask1 = and i32 %A, 7
%tst1 = icmp ne i32 %mask1, 0
-
%mask2 = and i32 %A, 39
%tst2 = icmp ne i32 %mask2, 0
-
%res = and i1 %tst1, %tst2
ret i1 %res
}
define i1 @masked_or_allzeroes(i32 %A) {
-; CHECK-LABEL: @masked_or_allzeroes
-; CHECK: [[MASK:%.*]] = and i32 %A, 7
-; CHECK: icmp eq i32 [[MASK]], 0
-; CHECK-NOT: and i32 %A, 39
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_or_allzeroes(
+; CHECK-NEXT: [[MASK1:%.*]] = and i32 %A, 7
+; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASK1]], 0
+; CHECK-NEXT: ret i1 [[TST1]]
+;
%mask1 = and i32 %A, 7
%tst1 = icmp eq i32 %mask1, 0
-
%mask2 = and i32 %A, 39
%tst2 = icmp eq i32 %mask2, 0
-
%res = or i1 %tst1, %tst2
ret i1 %res
}
define i1 @masked_and_notallones(i32 %A) {
-; CHECK-LABEL: @masked_and_notallones
-; CHECK: [[MASK:%.*]] = and i32 %A, 7
-; CHECK: icmp ne i32 [[MASK]], 7
-; CHECK-NOT: and i32 %A, 39
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_and_notallones(
+; CHECK-NEXT: [[MASK1:%.*]] = and i32 %A, 7
+; CHECK-NEXT: [[TST1:%.*]] = icmp ne i32 [[MASK1]], 7
+; CHECK-NEXT: ret i1 [[TST1]]
+;
%mask1 = and i32 %A, 7
%tst1 = icmp ne i32 %mask1, 7
-
%mask2 = and i32 %A, 39
%tst2 = icmp ne i32 %mask2, 39
-
%res = and i1 %tst1, %tst2
ret i1 %res
}
define i1 @masked_or_allones(i32 %A) {
-; CHECK-LABEL: @masked_or_allones
-; CHECK: [[MASK:%.*]] = and i32 %A, 7
-; CHECK: icmp eq i32 [[MASK]], 7
-; CHECK-NOT: and i32 %A, 39
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_or_allones(
+; CHECK-NEXT: [[MASK1:%.*]] = and i32 %A, 7
+; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASK1]], 7
+; CHECK-NEXT: ret i1 [[TST1]]
+;
%mask1 = and i32 %A, 7
%tst1 = icmp eq i32 %mask1, 7
-
%mask2 = and i32 %A, 39
%tst2 = icmp eq i32 %mask2, 39
-
%res = or i1 %tst1, %tst2
ret i1 %res
}
define i1 @masked_and_notA(i32 %A) {
-; CHECK-LABEL: @masked_and_notA
-; CHECK: [[MASK:%.*]] = and i32 %A, 39
-; CHECK: icmp ne i32 [[MASK]], %A
-; CHECK-NOT: and i32 %A, 7
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_and_notA(
+; CHECK-NEXT: [[MASK2:%.*]] = and i32 %A, 39
+; CHECK-NEXT: [[TST2:%.*]] = icmp ne i32 [[MASK2]], %A
+; CHECK-NEXT: ret i1 [[TST2]]
+;
%mask1 = and i32 %A, 7
%tst1 = icmp ne i32 %mask1, %A
-
%mask2 = and i32 %A, 39
%tst2 = icmp ne i32 %mask2, %A
-
%res = and i1 %tst1, %tst2
ret i1 %res
}
define i1 @masked_or_A(i32 %A) {
-; CHECK-LABEL: @masked_or_A
-; CHECK: [[MASK:%.*]] = and i32 %A, 39
-; CHECK: icmp eq i32 [[MASK]], %A
-; CHECK-NOT: and i32 %A, 7
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_or_A(
+; CHECK-NEXT: [[MASK2:%.*]] = and i32 %A, 39
+; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASK2]], %A
+; CHECK-NEXT: ret i1 [[TST2]]
+;
%mask1 = and i32 %A, 7
%tst1 = icmp eq i32 %mask1, %A
-
%mask2 = and i32 %A, 39
%tst2 = icmp eq i32 %mask2, %A
-
%res = or i1 %tst1, %tst2
ret i1 %res
}
define i1 @masked_or_allzeroes_notoptimised(i32 %A) {
-; CHECK-LABEL: @masked_or_allzeroes_notoptimised
-; CHECK: [[MASK:%.*]] = and i32 %A, 15
-; CHECK: icmp eq i32 [[MASK]], 0
-; CHECK: [[MASK:%.*]] = and i32 %A, 39
-; CHECK: icmp eq i32 [[MASK]], 0
-; CHECK: ret i1
-
+; CHECK-LABEL: @masked_or_allzeroes_notoptimised(
+; CHECK-NEXT: [[MASK1:%.*]] = and i32 %A, 15
+; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASK1]], 0
+; CHECK-NEXT: [[MASK2:%.*]] = and i32 %A, 39
+; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASK2]], 0
+; CHECK-NEXT: [[RES:%.*]] = or i1 [[TST1]], [[TST2]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%mask1 = and i32 %A, 15
%tst1 = icmp eq i32 %mask1, 0
-
%mask2 = and i32 %A, 39
%tst2 = icmp eq i32 %mask2, 0
-
%res = or i1 %tst1, %tst2
ret i1 %res
}
define i1 @nomask_lhs(i32 %in) {
-; CHECK-LABEL: @nomask_lhs
-; CHECK: [[MASK:%.*]] = and i32 %in, 1
-; CHECK: icmp eq i32 [[MASK]], 0
-; CHECK-NOT: icmp
-; CHECK: ret i1
+; CHECK-LABEL: @nomask_lhs(
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 %in, 1
+; CHECK-NEXT: [[TST2:%.*]] = icmp eq i32 [[MASKED]], 0
+; CHECK-NEXT: ret i1 [[TST2]]
+;
%tst1 = icmp eq i32 %in, 0
-
%masked = and i32 %in, 1
%tst2 = icmp eq i32 %masked, 0
-
%val = or i1 %tst1, %tst2
ret i1 %val
}
-
define i1 @nomask_rhs(i32 %in) {
-; CHECK-LABEL: @nomask_rhs
-; CHECK: [[MASK:%.*]] = and i32 %in, 1
-; CHECK: icmp eq i32 [[MASK]], 0
-; CHECK-NOT: icmp
-; CHECK: ret i1
+; CHECK-LABEL: @nomask_rhs(
+; CHECK-NEXT: [[MASKED:%.*]] = and i32 %in, 1
+; CHECK-NEXT: [[TST1:%.*]] = icmp eq i32 [[MASKED]], 0
+; CHECK-NEXT: ret i1 [[TST1]]
+;
%masked = and i32 %in, 1
%tst1 = icmp eq i32 %masked, 0
-
%tst2 = icmp eq i32 %in, 0
-
%val = or i1 %tst1, %tst2
ret i1 %val
}
+; TODO: This test simplifies to a constant, so the functionality and test could be in InstSimplify.
+
define i1 @fold_mask_cmps_to_false(i32 %x) {
-; CHECK-LABEL: @fold_mask_cmps_to_false
-; CHECK: ret i1 false
+; CHECK-LABEL: @fold_mask_cmps_to_false(
+; CHECK-NEXT: ret i1 false
+;
%1 = and i32 %x, 2147483647
%2 = icmp eq i32 %1, 0
%3 = icmp eq i32 %x, 2147483647
@@ -161,12 +140,46 @@ define i1 @fold_mask_cmps_to_false(i32 %x) {
ret i1 %4
}
+; TODO: This test simplifies to a constant, so the functionality and test could be in InstSimplify.
+
define i1 @fold_mask_cmps_to_true(i32 %x) {
-; CHECK-LABEL: @fold_mask_cmps_to_true
-; CHECK: ret i1 true
+; CHECK-LABEL: @fold_mask_cmps_to_true(
+; CHECK-NEXT: ret i1 true
+;
%1 = and i32 %x, 2147483647
%2 = icmp ne i32 %1, 0
%3 = icmp ne i32 %x, 2147483647
%4 = or i1 %3, %2
ret i1 %4
}
+
+; PR32401 - https://bugs.llvm.org/show_bug.cgi?id=32401
+
+define i1 @cmpeq_bitwise(i8 %a, i8 %b, i8 %c, i8 %d) {
+; CHECK-LABEL: @cmpeq_bitwise(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 %c, %d
+; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %xor1 = xor i8 %a, %b
+ %xor2 = xor i8 %c, %d
+ %or = or i8 %xor1, %xor2
+ %cmp = icmp eq i8 %or, 0
+ ret i1 %cmp
+}
+
+define <2 x i1> @cmpne_bitwise(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
+; CHECK-LABEL: @cmpne_bitwise(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> %c, %d
+; CHECK-NEXT: [[CMP:%.*]] = or <2 x i1> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %xor1 = xor <2 x i64> %a, %b
+ %xor2 = xor <2 x i64> %c, %d
+ %or = or <2 x i64> %xor1, %xor2
+ %cmp = icmp ne <2 x i64> %or, zeroinitializer
+ ret <2 x i1> %cmp
+}
+
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index 947971c6c83b0..be64f51b6c4c5 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
-define i32 @test1(i32 %x, i32 %y) nounwind {
+; X | ~(X | Y) --> X | ~Y
+
+define i32 @test1(i32 %x, i32 %y) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[Y_NOT:%.*]] = xor i32 %y, -1
; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y_NOT]], %x
@@ -13,7 +15,10 @@ define i32 @test1(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test2(i32 %x, i32 %y) nounwind {
+; Commute (rename) the inner 'or' operands:
+; Y | ~(X | Y) --> ~X | Y
+
+define i32 @test2(i32 %x, i32 %y) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X_NOT:%.*]] = xor i32 %x, -1
; CHECK-NEXT: [[Z:%.*]] = or i32 [[X_NOT]], %y
@@ -25,7 +30,9 @@ define i32 @test2(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test3(i32 %x, i32 %y) nounwind {
+; X | ~(X ^ Y) --> X | ~Y
+
+define i32 @test3(i32 %x, i32 %y) {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[Y_NOT:%.*]] = xor i32 %y, -1
; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y_NOT]], %x
@@ -37,7 +44,10 @@ define i32 @test3(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test4(i32 %x, i32 %y) nounwind {
+; Commute (rename) the 'xor' operands:
+; Y | ~(X ^ Y) --> ~X | Y
+
+define i32 @test4(i32 %x, i32 %y) {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[X_NOT:%.*]] = xor i32 %x, -1
; CHECK-NEXT: [[Z:%.*]] = or i32 [[X_NOT]], %y
@@ -49,7 +59,7 @@ define i32 @test4(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test5(i32 %x, i32 %y) nounwind {
+define i32 @test5(i32 %x, i32 %y) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: ret i32 -1
;
@@ -59,7 +69,7 @@ define i32 @test5(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test6(i32 %x, i32 %y) nounwind {
+define i32 @test6(i32 %x, i32 %y) {
; CHECK-LABEL: @test6(
; CHECK-NEXT: ret i32 -1
;
@@ -69,7 +79,7 @@ define i32 @test6(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test7(i32 %x, i32 %y) nounwind {
+define i32 @test7(i32 %x, i32 %y) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: [[Z:%.*]] = or i32 %x, %y
; CHECK-NEXT: ret i32 [[Z]]
@@ -79,7 +89,7 @@ define i32 @test7(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test8(i32 %x, i32 %y) nounwind {
+define i32 @test8(i32 %x, i32 %y) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: [[X_NOT:%.*]] = xor i32 %x, -1
; CHECK-NEXT: [[Z:%.*]] = or i32 [[X_NOT]], %y
@@ -91,7 +101,7 @@ define i32 @test8(i32 %x, i32 %y) nounwind {
ret i32 %z
}
-define i32 @test9(i32 %x, i32 %y) nounwind {
+define i32 @test9(i32 %x, i32 %y) {
; CHECK-LABEL: @test9(
; CHECK-NEXT: [[Y_NOT:%.*]] = xor i32 %y, -1
; CHECK-NEXT: [[Z:%.*]] = or i32 [[Y_NOT]], %x
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 764fe4503b5e1..fb56449ba4d46 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -397,14 +397,74 @@ define <2 x i132> @orsext_to_sel_vec_swap(<2 x i132> %x, <2 x i1> %y) {
ret <2 x i132> %or
}
-define i32 @test39(i32 %a, i32 %b) {
-; CHECK-LABEL: @test39(
-; CHECK-NEXT: [[OR:%.*]] = or i32 %b, %a
+; (~A & B) | A --> A | B
+
+define i32 @test39a(i32 %a, float %b) {
+; CHECK-LABEL: @test39a(
+; CHECK-NEXT: [[A1:%.*]] = mul i32 %a, 42
+; CHECK-NEXT: [[B1:%.*]] = bitcast float %b to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A1]], [[B1]]
; CHECK-NEXT: ret i32 [[OR]]
;
- %xor = xor i32 %a, -1
- %and = and i32 %xor, %b
- %or = or i32 %and, %a
+ %a1 = mul i32 %a, 42 ; thwart complexity-based ordering
+ %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+ %nota = xor i32 %a1, -1
+ %and = and i32 %nota, %b1
+ %or = or i32 %and, %a1
+ ret i32 %or
+}
+
+; Commute 'and' operands:
+; (B & ~A) | A --> A | B
+
+define i32 @test39b(i32 %a, float %b) {
+; CHECK-LABEL: @test39b(
+; CHECK-NEXT: [[A1:%.*]] = mul i32 %a, 42
+; CHECK-NEXT: [[B1:%.*]] = bitcast float %b to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %a1 = mul i32 %a, 42 ; thwart complexity-based ordering
+ %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+ %nota = xor i32 %a1, -1
+ %and = and i32 %b1, %nota
+ %or = or i32 %and, %a1
+ ret i32 %or
+}
+
+; Commute 'or' operands:
+; A | (~A & B) --> A | B
+
+define i32 @test39c(i32 %a, float %b) {
+; CHECK-LABEL: @test39c(
+; CHECK-NEXT: [[A1:%.*]] = mul i32 %a, 42
+; CHECK-NEXT: [[B1:%.*]] = bitcast float %b to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %a1 = mul i32 %a, 42 ; thwart complexity-based ordering
+ %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+ %nota = xor i32 %a1, -1
+ %and = and i32 %nota, %b1
+ %or = or i32 %a1, %and
+ ret i32 %or
+}
+
+; Commute 'and' operands:
+; A | (B & ~A) --> A | B
+
+define i32 @test39d(i32 %a, float %b) {
+; CHECK-LABEL: @test39d(
+; CHECK-NEXT: [[A1:%.*]] = mul i32 %a, 42
+; CHECK-NEXT: [[B1:%.*]] = bitcast float %b to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %a1 = mul i32 %a, 42 ; thwart complexity-based ordering
+ %b1 = bitcast float %b to i32 ; thwart complexity-based ordering
+ %nota = xor i32 %a1, -1
+ %and = and i32 %b1, %nota
+ %or = or i32 %a1, %and
ret i32 %or
}
@@ -456,60 +516,6 @@ define i32 @test40d(i32 %a, i32 %b) {
ret i32 %or
}
-define i32 @test41(i32 %a, i32 %b) {
-; CHECK-LABEL: @test41(
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
-; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], %b
-; CHECK-NEXT: ret i32 [[OR]]
-;
- %and = and i32 %a, %b
- %nega = xor i32 %a, -1
- %xor = xor i32 %nega, %b
- %or = or i32 %and, %xor
- ret i32 %or
-}
-
-; (~A ^ B) | (A & B) -> (~A ^ B)
-
-define i32 @test42(i32 %a, i32 %b) {
-; CHECK-LABEL: @test42(
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
-; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], %b
-; CHECK-NEXT: ret i32 [[OR]]
-;
- %nega = xor i32 %a, -1
- %xor = xor i32 %nega, %b
- %and = and i32 %a, %b
- %or = or i32 %xor, %and
- ret i32 %or
-}
-
-define i32 @test42_commuted_and(i32 %a, i32 %b) {
-; CHECK-LABEL: @test42_commuted_and(
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
-; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], %b
-; CHECK-NEXT: ret i32 [[OR]]
-;
- %nega = xor i32 %a, -1
- %xor = xor i32 %nega, %b
- %and = and i32 %b, %a
- %or = or i32 %xor, %and
- ret i32 %or
-}
-
-define i32 @test42_commuted_xor(i32 %a, i32 %b) {
-; CHECK-LABEL: @test42_commuted_xor(
-; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
-; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], %b
-; CHECK-NEXT: ret i32 [[OR]]
-;
- %nega = xor i32 %a, -1
- %xor = xor i32 %b, %nega
- %and = and i32 %a, %b
- %or = or i32 %xor, %and
- ret i32 %or
-}
-
define i32 @test45(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @test45(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, %z
@@ -648,41 +654,146 @@ final:
ret <2 x i32> %value
}
-define i8 @test51(i8 %a, i8 %b, i8 %c) {
-; CHECK-LABEL: @test51(
-; CHECK-NEXT: [[W:%.*]] = mul i8 [[B:%.*]], [[C:%.*]]
-; CHECK-NEXT: [[X:%.*]] = or i8 [[W]], [[A:%.*]]
-; CHECK-NEXT: ret i8 [[X]]
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (X | (Y & ~X)) -> (X | Y), where 'not' is an inverted cmp
+
+define i1 @or_andn_cmp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @or_andn_cmp_1(
+; CHECK-NEXT: [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[OR]]
+;
+ %x = icmp sgt i32 %a, %b
+ %x_inv = icmp sle i32 %a, %b
+ %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering
+ %and = and i1 %y, %x_inv
+ %or = or i1 %x, %and
+ ret i1 %or
+}
+
+; Commute the 'or':
+; ((Y & ~X) | X) -> (X | Y), where 'not' is an inverted cmp
+
+define <2 x i1> @or_andn_cmp_2(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_2(
+; CHECK-NEXT: [[X:%.*]] = icmp sge <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt <2 x i32> [[C:%.*]], <i32 42, i32 47>
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[Y]], [[X]]
+; CHECK-NEXT: ret <2 x i1> [[OR]]
+;
+ %x = icmp sge <2 x i32> %a, %b
+ %x_inv = icmp slt <2 x i32> %a, %b
+ %y = icmp ugt <2 x i32> %c, <i32 42, i32 47> ; thwart complexity-based ordering
+ %and = and <2 x i1> %y, %x_inv
+ %or = or <2 x i1> %and, %x
+ ret <2 x i1> %or
+}
+
+; Commute the 'and':
+; (X | (~X & Y)) -> (X | Y), where 'not' is an inverted cmp
+
+define i1 @or_andn_cmp_3(i72 %a, i72 %b, i72 %c) {
+; CHECK-LABEL: @or_andn_cmp_3(
+; CHECK-NEXT: [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[OR]]
+;
+ %x = icmp ugt i72 %a, %b
+ %x_inv = icmp ule i72 %a, %b
+ %y = icmp ugt i72 %c, 42 ; thwart complexity-based ordering
+ %and = and i1 %x_inv, %y
+ %or = or i1 %x, %and
+ ret i1 %or
+}
+
+; Commute the 'or':
+; ((~X & Y) | X) -> (X | Y), where 'not' is an inverted cmp
+
+define <3 x i1> @or_andn_cmp_4(<3 x i32> %a, <3 x i32> %b, <3 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_4(
+; CHECK-NEXT: [[X:%.*]] = icmp eq <3 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt <3 x i32> [[C:%.*]], <i32 42, i32 43, i32 -1>
+; CHECK-NEXT: [[OR:%.*]] = or <3 x i1> [[Y]], [[X]]
+; CHECK-NEXT: ret <3 x i1> [[OR]]
+;
+ %x = icmp eq <3 x i32> %a, %b
+ %x_inv = icmp ne <3 x i32> %a, %b
+ %y = icmp ugt <3 x i32> %c, <i32 42, i32 43, i32 -1> ; thwart complexity-based ordering
+ %and = and <3 x i1> %x_inv, %y
+ %or = or <3 x i1> %and, %x
+ ret <3 x i1> %or
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (~X | (Y & X)) -> (~X | Y), where 'not' is an inverted cmp
+
+define i1 @orn_and_cmp_1(i37 %a, i37 %b, i37 %c) {
+; CHECK-LABEL: @orn_and_cmp_1(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[X_INV]], [[Y]]
+; CHECK-NEXT: ret i1 [[OR]]
+;
+ %x = icmp sgt i37 %a, %b
+ %x_inv = icmp sle i37 %a, %b
+ %y = icmp ugt i37 %c, 42 ; thwart complexity-based ordering
+ %and = and i1 %y, %x
+ %or = or i1 %x_inv, %and
+ ret i1 %or
+}
+
+; Commute the 'or':
+; ((Y & X) | ~X) -> (~X | Y), where 'not' is an inverted cmp
+
+define i1 @orn_and_cmp_2(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: @orn_and_cmp_2(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[Y]], [[X_INV]]
+; CHECK-NEXT: ret i1 [[OR]]
;
- %w = mul i8 %b, %c
- %z = xor i8 %a, -1
- %y = and i8 %w, %z
- %x = or i8 %y, %a
- ret i8 %x
+ %x = icmp sge i16 %a, %b
+ %x_inv = icmp slt i16 %a, %b
+ %y = icmp ugt i16 %c, 42 ; thwart complexity-based ordering
+ %and = and i1 %y, %x
+ %or = or i1 %and, %x_inv
+ ret i1 %or
}
-define i8 @test52(i8 %a, i8 %b, i8 %c) {
-; CHECK-LABEL: @test52(
-; CHECK-NEXT: [[W:%.*]] = mul i8 [[B:%.*]], [[C:%.*]]
-; CHECK-NEXT: [[X:%.*]] = or i8 [[W]], [[A:%.*]]
-; CHECK-NEXT: ret i8 [[X]]
+; Commute the 'and':
+; (~X | (X & Y)) -> (~X | Y), where 'not' is an inverted cmp
+
+define <4 x i1> @orn_and_cmp_3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @orn_and_cmp_3(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt <4 x i32> [[C:%.*]], <i32 42, i32 0, i32 1, i32 -1>
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i1> [[X_INV]], [[Y]]
+; CHECK-NEXT: ret <4 x i1> [[OR]]
;
- %w = mul i8 %b, %c
- %z = xor i8 %w, -1
- %y = and i8 %z, %a
- %x = or i8 %w, %y
- ret i8 %x
+ %x = icmp ugt <4 x i32> %a, %b
+ %x_inv = icmp ule <4 x i32> %a, %b
+ %y = icmp ugt <4 x i32> %c, <i32 42, i32 0, i32 1, i32 -1> ; thwart complexity-based ordering
+ %and = and <4 x i1> %x, %y
+ %or = or <4 x i1> %x_inv, %and
+ ret <4 x i1> %or
}
-define i8 @test53(i8 %a, i8 %b, i8 %c) {
-; CHECK-LABEL: @test53(
-; CHECK-NEXT: [[W:%.*]] = mul i8 [[B:%.*]], [[C:%.*]]
-; CHECK-NEXT: [[X:%.*]] = or i8 [[W]], [[A:%.*]]
-; CHECK-NEXT: ret i8 [[X]]
+; Commute the 'or':
+; ((X & Y) | ~X) -> (~X | Y), where 'not' is an inverted cmp
+
+define i1 @orn_and_cmp_4(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @orn_and_cmp_4(
+; CHECK-NEXT: [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT: [[OR:%.*]] = or i1 [[Y]], [[X_INV]]
+; CHECK-NEXT: ret i1 [[OR]]
;
- %w = mul i8 %b, %c
- %z = xor i8 %w, -1
- %y = and i8 %z, %a
- %x = or i8 %w, %y
- ret i8 %x
+ %x = icmp eq i32 %a, %b
+ %x_inv = icmp ne i32 %a, %b
+ %y = icmp ugt i32 %c, 42 ; thwart complexity-based ordering
+ %and = and i1 %x, %y
+ %or = or i1 %and, %x_inv
+ ret i1 %or
}
diff --git a/test/Transforms/InstCombine/pr33765.ll b/test/Transforms/InstCombine/pr33765.ll
new file mode 100644
index 0000000000000..99ed0d13b5cf5
--- /dev/null
+++ b/test/Transforms/InstCombine/pr33765.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S %s -instcombine | FileCheck %s
+
+@glob = external global i16
+
+define void @patatino(i8 %beth) {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[BETH:%.*]] to i32
+; CHECK-NEXT: br i1 undef, label [[IF_THEN9:%.*]], label [[IF_THEN9]]
+; CHECK: if.then9:
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
+; CHECK-NEXT: [[TINKY:%.*]] = load i16, i16* @glob, align 2
+; CHECK-NEXT: [[CONV131:%.*]] = zext i16 [[TINKY]] to i32
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], [[CONV131]]
+; CHECK-NEXT: [[CONV14:%.*]] = trunc i32 [[AND]] to i16
+; CHECK-NEXT: store i16 [[CONV14]], i16* @glob, align 2
+; CHECK-NEXT: ret void
+;
+ %conv = zext i8 %beth to i32
+ %mul = mul nuw nsw i32 %conv, %conv
+ %conv3 = and i32 %mul, 255
+ %tobool8 = icmp ne i32 %mul, %conv3
+ br i1 %tobool8, label %if.then9, label %if.then9
+
+if.then9:
+ %tinky = load i16, i16* @glob
+ %conv13 = sext i16 %tinky to i32
+ %and = and i32 %mul, %conv13
+ %conv14 = trunc i32 %and to i16
+ store i16 %conv14, i16* @glob
+ ret void
+}
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 6a3cf7edd7dcd..5e84ec54971a0 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -280,10 +280,85 @@ cond.false.15.i: ; preds = %cond.false.10.i
ret i32 %j.add3
; CHECK-LABEL: @unfold3
-; CHECK: br i1 %cmp.i, label %.exit.thread2, label %cond.false.i
+; CHECK: br i1 %cmp.i, label %.exit.thread2, label %cond.false.i
; CHECK: br i1 %cmp4.i, label %.exit.thread, label %cond.false.6.i
; CHECK: br i1 %cmp8.i, label %.exit.thread2, label %cond.false.10.i
; CHECK: br i1 %cmp13.i, label %.exit.thread, label %.exit
; CHECK: br i1 %phitmp, label %.exit.thread, label %.exit.thread2
; CHECK: br label %.exit.thread2
}
+
+define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
+entry:
+ %add3 = add nsw i32 %j, 2
+ %cmp.i = icmp slt i32 %u, %v
+ br i1 %cmp.i, label %.exit, label %cond.false.i
+
+cond.false.i: ; preds = %entry
+ %cmp4.i = icmp sgt i32 %u, %v
+ br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+
+cond.false.6.i: ; preds = %cond.false.i
+ %cmp8.i = icmp slt i32 %w, %x
+ br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+
+cond.false.10.i: ; preds = %cond.false.6.i
+ %cmp13.i = icmp sgt i32 %w, %x
+ br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+
+cond.false.15.i: ; preds = %cond.false.10.i
+ %cmp19.i = icmp sge i32 %y, %z
+ %conv = zext i1 %cmp19.i to i32
+ br label %.exit
+
+.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
+ %cond23.i = phi i32 [ 1, %entry ], [ 0, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 0, %cond.false.10.i ]
+ %lnot.i18 = icmp eq i32 %cond23.i, 1
+ %j.add3 = select i1 %lnot.i18, i32 %j, i32 %add3
+ ret i32 %j.add3
+
+; CHECK-LABEL: @unfold4
+; CHECK: br i1 %cmp.i, label %.exit.thread, label %cond.false.i
+; CHECK: br i1 %cmp4.i, label %.exit.thread3, label %cond.false.6.i
+; CHECK: br i1 %cmp8.i, label %.exit.thread, label %cond.false.10.i
+; CHECK: br i1 %cmp13.i, label %.exit.thread3, label %.exit
+; CHECK: br i1 %lnot.i18, label %.exit.thread, label %.exit.thread3
+; CHECK: br label %.exit.thread3
+}
+
+define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
+entry:
+ %add3 = add nsw i32 %j, 2
+ %cmp.i = icmp slt i32 %u, %v
+ br i1 %cmp.i, label %.exit, label %cond.false.i
+
+cond.false.i: ; preds = %entry
+ %cmp4.i = icmp sgt i32 %u, %v
+ br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+
+cond.false.6.i: ; preds = %cond.false.i
+ %cmp8.i = icmp slt i32 %w, %x
+ br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+
+cond.false.10.i: ; preds = %cond.false.6.i
+ %cmp13.i = icmp sgt i32 %w, %x
+ br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+
+cond.false.15.i: ; preds = %cond.false.10.i
+ %cmp19.i = icmp sge i32 %y, %z
+ %conv = zext i1 %cmp19.i to i32
+ br label %.exit
+
+.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
+ %cond23.i = phi i32 [ 2, %entry ], [ 3, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 7, %cond.false.10.i ]
+ %lnot.i18 = icmp sgt i32 %cond23.i, 5
+ %j.add3 = select i1 %lnot.i18, i32 %j, i32 %cond23.i
+ ret i32 %j.add3
+
+; CHECK-LABEL: @unfold5
+; CHECK: br i1 %cmp.i, label %.exit, label %cond.false.i
+; CHECK: br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+; CHECK: br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+; CHECK: br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+; CHECK: br label %.exit
+}
diff --git a/test/Transforms/LoopInterchange/current-limitations-lcssa.ll b/test/Transforms/LoopInterchange/current-limitations-lcssa.ll
new file mode 100644
index 0000000000000..df6c6cfdbcb5d
--- /dev/null
+++ b/test/Transforms/LoopInterchange/current-limitations-lcssa.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@C = common global [100 x [100 x i32]] zeroinitializer
+
+;; FIXME:
+;; Test for interchange when we have an lcssa phi. This should ideally be interchanged but it is currently not supported.
+;; for(gi=1;gi<N;gi++)
+;; for(gj=1;gj<M;gj++)
+;; A[gj][gi] = A[gj - 1][gi] + C[gj][gi];
+
+@gi = common global i32 0
+@gj = common global i32 0
+
+define void @interchange_07(i32 %N, i32 %M){
+entry:
+ store i32 1, i32* @gi
+ %cmp21 = icmp sgt i32 %N, 1
+ br i1 %cmp21, label %for.cond1.preheader.lr.ph, label %for.end16
+
+for.cond1.preheader.lr.ph:
+ %cmp218 = icmp sgt i32 %M, 1
+ %gi.promoted = load i32, i32* @gi
+ %0 = add i32 %M, -1
+ %1 = sext i32 %gi.promoted to i64
+ %2 = sext i32 %N to i64
+ %3 = add i32 %gi.promoted, 1
+ %4 = icmp slt i32 %3, %N
+ %smax = select i1 %4, i32 %N, i32 %3
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv25 = phi i64 [ %1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next26, %for.inc14 ]
+ br i1 %cmp218, label %for.body3, label %for.inc14
+
+for.body3:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.cond1.preheader ]
+ %5 = add nsw i64 %indvars.iv, -1
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
+ %6 = load i32, i32* %arrayidx5
+ %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
+ %7 = load i32, i32* %arrayidx9
+ %add = add nsw i32 %7, %6
+ %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv25
+ store i32 %add, i32* %arrayidx13
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.inc14, label %for.body3
+
+for.inc14:
+ %inc.lcssa23 = phi i32 [ 1, %for.cond1.preheader ], [ %M, %for.body3 ]
+ %indvars.iv.next26 = add nsw i64 %indvars.iv25, 1
+ %cmp = icmp slt i64 %indvars.iv.next26, %2
+ br i1 %cmp, label %for.cond1.preheader, label %for.cond.for.end16_crit_edge
+
+for.cond.for.end16_crit_edge:
+ store i32 %inc.lcssa23, i32* @gj
+ store i32 %smax, i32* @gi
+ br label %for.end16
+
+for.end16:
+ ret void
+}
+
+; CHECK-LABEL: @interchange_07
+; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
+; CHECK: %5 = add nsw i64 %indvars.iv, -1
+; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
+; CHECK: %6 = load i32, i32* %arrayidx5
+; CHECK: %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
diff --git a/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll b/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
new file mode 100644
index 0000000000000..c3b0b9291424b
--- /dev/null
+++ b/test/Transforms/LoopInterchange/interchange-flow-dep-outer.ll
@@ -0,0 +1,118 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x i32] zeroinitializer
+@C = common global [100 x [100 x i32]] zeroinitializer
+@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
+
+;; Test that a flow dependency in outer loop doesn't prevent interchange in
+;; loops i and j.
+;;
+;; for (int k = 0; k < 100; ++k) {
+;; T[k] = fn1();
+;; for (int i = 0; i < 1000; ++i)
+;; for(int j = 1; j < 1000; ++j)
+;; Arr[j][i] = Arr[j][i]+k;
+;; fn2(T[k]);
+;; }
+
+@T = internal global [100 x double] zeroinitializer, align 4
+@Arr = internal global [1000 x [1000 x i32]] zeroinitializer, align 4
+
+define void @interchange_09(i32 %k) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.cond.cleanup4
+ ret void
+
+for.body: ; preds = %for.cond.cleanup4, %entry
+ %indvars.iv45 = phi i64 [ 0, %entry ], [ %indvars.iv.next46, %for.cond.cleanup4 ]
+ %call = call double @fn1()
+ %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
+ store double %call, double* %arrayidx, align 8
+ br label %for.cond6.preheader
+
+for.cond6.preheader: ; preds = %for.cond.cleanup8, %for.body
+ %indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.cond.cleanup8 ]
+ br label %for.body9
+
+for.cond.cleanup4: ; preds = %for.cond.cleanup8
+ %tmp = load double, double* %arrayidx, align 8
+ call void @fn2(double %tmp)
+ %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
+ %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
+ br i1 %exitcond47, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup8: ; preds = %for.body9
+ %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
+ %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
+ br i1 %exitcond44, label %for.cond6.preheader, label %for.cond.cleanup4
+
+for.body9: ; preds = %for.body9, %for.cond6.preheader
+ %indvars.iv = phi i64 [ 1, %for.cond6.preheader ], [ %indvars.iv.next, %for.body9 ]
+ %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
+ %tmp1 = load i32, i32* %arrayidx13, align 4
+ %tmp2 = trunc i64 %indvars.iv45 to i32
+ %add = add nsw i32 %tmp1, %tmp2
+ store i32 %add, i32* %arrayidx13, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.body9, label %for.cond.cleanup8
+}
+
+declare double @fn1()
+declare void @fn2(double)
+
+
+;; After interchange %indvars.iv (j) should increment as the middle loop.
+;; After interchange %indvars.iv42 (i) should increment with the inner most loop.
+
+; CHECK-LABEL: @interchange_09
+
+; CHECK: for.body:
+; CHECK: %indvars.iv45 = phi i64 [ %indvars.iv.next46, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
+; CHECK: %call = call double @fn1()
+; CHECK: %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
+; CHECK: store double %call, double* %arrayidx, align 8
+; CHECK: br label %for.body9.preheader
+
+; CHECK: for.cond6.preheader.preheader:
+; CHECK: br label %for.cond6.preheader
+
+; CHECK: for.cond6.preheader:
+; CHECK: %indvars.iv42 = phi i64 [ %indvars.iv.next43, %for.cond.cleanup8 ], [ 0, %for.cond6.preheader.preheader ]
+; CHECK: br label %for.body9.split1
+
+; CHECK: for.body9.preheader:
+; CHECK: br label %for.body9
+
+; CHECK: for.cond.cleanup4:
+; CHECK: %tmp = load double, double* %arrayidx, align 8
+; CHECK: call void @fn2(double %tmp)
+; CHECK: %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
+; CHECK: %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
+; CHECK: br i1 %exitcond47, label %for.body, label %for.cond.cleanup
+
+; CHECK: for.cond.cleanup8:
+; CHECK: %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
+; CHECK: %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
+; CHECK: br i1 %exitcond44, label %for.cond6.preheader, label %for.body9.split
+
+; CHECK: for.body9:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9.split ], [ 1, %for.body9.preheader ]
+; CHECK: br label %for.cond6.preheader.preheader
+
+; CHECK: for.body9.split1:
+; CHECK: %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
+; CHECK: store i32 %add, i32* %arrayidx13, align 4
+; CHECK: br label %for.cond.cleanup8
+
+; CHECK: for.body9.split:
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 1000
+; CHECK: br i1 %exitcond, label %for.body9, label %for.cond.cleanup4
diff --git a/test/Transforms/LoopInterchange/interchange-not-profitable.ll b/test/Transforms/LoopInterchange/interchange-not-profitable.ll
new file mode 100644
index 0000000000000..67a63cab08bd1
--- /dev/null
+++ b/test/Transforms/LoopInterchange/interchange-not-profitable.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x i32] zeroinitializer
+
+;; Loops should not be interchanged in this case as it is not profitable.
+;; for(int i=0;i<100;i++)
+;; for(int j=0;j<100;j++)
+;; A[i][j] = A[i][j]+k;
+
+define void @interchange_03(i32 %k) {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc10 ]
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx5
+ %add = add nsw i32 %0, %k
+ store i32 %add, i32* %arrayidx5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 100
+ br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:
+ %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+ %exitcond23 = icmp eq i64 %indvars.iv.next22, 100
+ br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
+
+for.end12:
+ ret void
+}
+
+; CHECK-LABEL: @interchange_03
+; CHECK: entry:
+; CHECK: br label %for.cond1.preheader.preheader
+; CHECK: for.cond1.preheader.preheader: ; preds = %entry
+; CHECK: br label %for.cond1.preheader
+; CHECK: for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc10
+; CHECK: %indvars.iv21 = phi i64 [ %indvars.iv.next22, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
+; CHECK: br label %for.body3.preheader
+; CHECK: for.body3.preheader: ; preds = %for.cond1.preheader
+; CHECK: br label %for.body3
+; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
+; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
+; CHECK: %0 = load i32, i32* %arrayidx5
+; CHECK: %add = add nsw i32 %0, %k
+; CHECK: store i32 %add, i32* %arrayidx5
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %exitcond = icmp eq i64 %indvars.iv.next, 100
+; CHECK: br i1 %exitcond, label %for.inc10, label %for.body3
+; CHECK: for.inc10: ; preds = %for.body3
+; CHECK: %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
+; CHECK: %exitcond23 = icmp eq i64 %indvars.iv.next22, 100
+; CHECK: br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
+; CHECK: for.end12: ; preds = %for.inc10
+; CHECK: ret void
diff --git a/test/Transforms/LoopInterchange/interchange-output-dependencies.ll b/test/Transforms/LoopInterchange/interchange-output-dependencies.ll
new file mode 100644
index 0000000000000..98deba96f8c6f
--- /dev/null
+++ b/test/Transforms/LoopInterchange/interchange-output-dependencies.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+
+;; Test to make sure we can handle output dependencies.
+;;
+;; for (int i = 0; i < 2; ++i)
+;; for(int j = 0; j < 3; ++j) {
+;; A[j][i] = i;
+;; A[j][i+1] = j;
+;; }
+
+@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
+
+define void @interchange_10() {
+entry:
+ br label %for.cond1.preheader
+
+for.cond.loopexit: ; preds = %for.body4
+ %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
+ br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond1.preheader: ; preds = %for.cond.loopexit, %entry
+ %indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ]
+ %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
+ br label %for.body4
+
+for.cond.cleanup: ; preds = %for.cond.loopexit
+ ret void
+
+for.body4: ; preds = %for.body4, %for.cond1.preheader
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+ %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
+ %tmp = trunc i64 %indvars.iv26 to i32
+ store i32 %tmp, i32* %arrayidx6, align 4
+ %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
+ %tmp1 = trunc i64 %indvars.iv to i32
+ store i32 %tmp1, i32* %arrayidx10, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 3
+ br i1 %exitcond, label %for.body4, label %for.cond.loopexit
+}
+
+; CHECK-LABEL: @interchange_10
+; CHECK: entry:
+; CHECK: br label %for.body4.preheader
+
+; CHECK: for.cond1.preheader.preheader:
+; CHECK: br label %for.cond1.preheader
+
+; CHECK: for.cond.loopexit:
+; CHECK: %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
+; CHECK: br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
+
+; CHECK: for.cond1.preheader:
+; CHECK: %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
+; CHECK: %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
+; CHECK: br label %for.body4.split1
+
+; CHECK: for.body4.preheader:
+; CHECK: br label %for.body4
+
+; CHECK: for.cond.cleanup:
+; CHECK: ret void
+
+; CHECK: for.body4:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
+; CHECK: br label %for.cond1.preheader.preheader
+
+; CHECK: for.body4.split1:
+; CHECK: %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
+; CHECK: %tmp = trunc i64 %indvars.iv26 to i32
+; CHECK: store i32 %tmp, i32* %arrayidx6, align 4
+; CHECK: %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
+; CHECK: %tmp1 = trunc i64 %indvars.iv to i32
+; CHECK: store i32 %tmp1, i32* %arrayidx10, align 4
+; CHECK: br label %for.cond.loopexit
+
+; CHECK: for.body4.split:
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 3
+; CHECK: br i1 %exitcond, label %for.body4, label %for.cond.cleanup
diff --git a/test/Transforms/LoopInterchange/interchange-simple-count-down.ll b/test/Transforms/LoopInterchange/interchange-simple-count-down.ll
new file mode 100644
index 0000000000000..70ba5940257f5
--- /dev/null
+++ b/test/Transforms/LoopInterchange/interchange-simple-count-down.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x i32] zeroinitializer
+
+;; for(int i=0;i<100;i++)
+;; for(int j=100;j>=0;j--)
+;; A[j][i] = A[j][i]+k;
+
+define void @interchange_02(i32 %k) {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc10 ]
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ 100, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
+ %0 = load i32, i32* %arrayidx5
+ %add = add nsw i32 %0, %k
+ store i32 %add, i32* %arrayidx5
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ %cmp2 = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp2, label %for.body3, label %for.inc10
+
+for.inc10:
+ %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+ %exitcond = icmp eq i64 %indvars.iv.next20, 100
+ br i1 %exitcond, label %for.end11, label %for.cond1.preheader
+
+for.end11:
+ ret void
+}
+
+; CHECK-LABEL: @interchange_02
+; CHECK: entry:
+; CHECK: br label %for.body3.preheader
+; CHECK: for.cond1.preheader.preheader:
+; CHECK: br label %for.cond1.preheader
+; CHECK: for.cond1.preheader:
+; CHECK: %indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
+; CHECK: br label %for.body3.split1
+; CHECK: for.body3.preheader:
+; CHECK: br label %for.body3
+; CHECK: for.body3:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 100, %for.body3.preheader ]
+; CHECK: br label %for.cond1.preheader.preheader
+; CHECK: for.body3.split1: ; preds = %for.cond1.preheader
+; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
+; CHECK: %0 = load i32, i32* %arrayidx5
+; CHECK: %add = add nsw i32 %0, %k
+; CHECK: store i32 %add, i32* %arrayidx5
+; CHECK: br label %for.inc10
+; CHECK: for.body3.split:
+; CHECK: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK: %cmp2 = icmp sgt i64 %indvars.iv, 0
+; CHECK: br i1 %cmp2, label %for.body3, label %for.end11
+; CHECK: for.inc10:
+; CHECK: %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+; CHECK: %exitcond = icmp eq i64 %indvars.iv.next20, 100
+; CHECK: br i1 %exitcond, label %for.body3.split, label %for.cond1.preheader
+; CHECK: for.end11:
+; CHECK: ret void
diff --git a/test/Transforms/LoopInterchange/interchange-simple-count-up.ll b/test/Transforms/LoopInterchange/interchange-simple-count-up.ll
new file mode 100644
index 0000000000000..4febe0269810d
--- /dev/null
+++ b/test/Transforms/LoopInterchange/interchange-simple-count-up.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x i32] zeroinitializer
+
+;; for(int i=0;i<N;i++)
+;; for(int j=1;j<N;j++)
+;; A[j][i] = A[j][i]+k;
+
+define void @interchange_01(i32 %k, i32 %N) {
+entry:
+ %cmp21 = icmp sgt i32 %N, 0
+ br i1 %cmp21, label %for.cond1.preheader.lr.ph, label %for.end12
+
+for.cond1.preheader.lr.ph:
+ %cmp219 = icmp sgt i32 %N, 1
+ %0 = add i32 %N, -1
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
+ br i1 %cmp219, label %for.body3, label %for.inc10
+
+for.body3:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.cond1.preheader ]
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+ %1 = load i32, i32* %arrayidx5
+ %add = add nsw i32 %1, %k
+ store i32 %add, i32* %arrayidx5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ %lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
+ %exitcond26 = icmp eq i32 %lftr.wideiv25, %0
+ br i1 %exitcond26, label %for.end12, label %for.cond1.preheader
+
+for.end12:
+ ret void
+}
+
+; CHECK-LABEL: @interchange_01
+; CHECK: entry:
+; CHECK: %cmp21 = icmp sgt i32 %N, 0
+; CHECK: br i1 %cmp21, label %for.body3.preheader, label %for.end12
+; CHECK: for.cond1.preheader.lr.ph:
+; CHECK: br label %for.cond1.preheader
+; CHECK: for.cond1.preheader:
+; CHECK: %indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
+; CHECK: br i1 %cmp219, label %for.body3.split1, label %for.end12.loopexit
+; CHECK: for.body3.preheader:
+; CHECK: %cmp219 = icmp sgt i32 %N, 1
+; CHECK: %0 = add i32 %N, -1
+; CHECK: br label %for.body3
+; CHECK: for.body3:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
+; CHECK: br label %for.cond1.preheader.lr.ph
+; CHECK: for.body3.split1:
+; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+; CHECK: %1 = load i32, i32* %arrayidx5
+; CHECK: %add = add nsw i32 %1, %k
+; CHECK: store i32 %add, i32* %arrayidx5
+; CHECK: br label %for.inc10.loopexit
+; CHECK: for.body3.split:
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
+; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
+; CHECK: br i1 %exitcond, label %for.end12.loopexit, label %for.body3
+; CHECK: for.inc10.loopexit:
+; CHECK: br label %for.inc10
+; CHECK: for.inc10:
+; CHECK: %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+; CHECK: %lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
+; CHECK: %exitcond26 = icmp eq i32 %lftr.wideiv25, %0
+; CHECK: br i1 %exitcond26, label %for.body3.split, label %for.cond1.preheader
+; CHECK: for.end12.loopexit:
+; CHECK: br label %for.end12
+; CHECK: for.end12:
+; CHECK: ret void
diff --git a/test/Transforms/LoopInterchange/interchange.ll b/test/Transforms/LoopInterchange/interchange.ll
deleted file mode 100644
index 77b33e43bedc7..0000000000000
--- a/test/Transforms/LoopInterchange/interchange.ll
+++ /dev/null
@@ -1,749 +0,0 @@
-; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
-;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@A = common global [100 x [100 x i32]] zeroinitializer
-@B = common global [100 x i32] zeroinitializer
-@C = common global [100 x [100 x i32]] zeroinitializer
-@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
-
-declare void @foo(...)
-
-;;--------------------------------------Test case 01------------------------------------
-;; for(int i=0;i<N;i++)
-;; for(int j=1;j<N;j++)
-;; A[j][i] = A[j][i]+k;
-
-define void @interchange_01(i32 %k, i32 %N) {
-entry:
- %cmp21 = icmp sgt i32 %N, 0
- br i1 %cmp21, label %for.cond1.preheader.lr.ph, label %for.end12
-
-for.cond1.preheader.lr.ph:
- %cmp219 = icmp sgt i32 %N, 1
- %0 = add i32 %N, -1
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
- br i1 %cmp219, label %for.body3, label %for.inc10
-
-for.body3:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.cond1.preheader ]
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
- %1 = load i32, i32* %arrayidx5
- %add = add nsw i32 %1, %k
- store i32 %add, i32* %arrayidx5
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv to i32
- %exitcond = icmp eq i32 %lftr.wideiv, %0
- br i1 %exitcond, label %for.inc10, label %for.body3
-
-for.inc10:
- %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
- %lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
- %exitcond26 = icmp eq i32 %lftr.wideiv25, %0
- br i1 %exitcond26, label %for.end12, label %for.cond1.preheader
-
-for.end12:
- ret void
-}
-
-; CHECK-LABEL: @interchange_01
-; CHECK: entry:
-; CHECK: %cmp21 = icmp sgt i32 %N, 0
-; CHECK: br i1 %cmp21, label %for.body3.preheader, label %for.end12
-; CHECK: for.cond1.preheader.lr.ph:
-; CHECK: br label %for.cond1.preheader
-; CHECK: for.cond1.preheader:
-; CHECK: %indvars.iv23 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next24, %for.inc10 ]
-; CHECK: br i1 %cmp219, label %for.body3.split1, label %for.end12.loopexit
-; CHECK: for.body3.preheader:
-; CHECK: %cmp219 = icmp sgt i32 %N, 1
-; CHECK: %0 = add i32 %N, -1
-; CHECK: br label %for.body3
-; CHECK: for.body3:
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 1, %for.body3.preheader ]
-; CHECK: br label %for.cond1.preheader.lr.ph
-; CHECK: for.body3.split1:
-; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv23
-; CHECK: %1 = load i32, i32* %arrayidx5
-; CHECK: %add = add nsw i32 %1, %k
-; CHECK: store i32 %add, i32* %arrayidx5
-; CHECK: br label %for.inc10.loopexit
-; CHECK: for.body3.split:
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
-; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
-; CHECK: br i1 %exitcond, label %for.end12.loopexit, label %for.body3
-; CHECK: for.inc10.loopexit:
-; CHECK: br label %for.inc10
-; CHECK: for.inc10:
-; CHECK: %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
-; CHECK: %lftr.wideiv25 = trunc i64 %indvars.iv23 to i32
-; CHECK: %exitcond26 = icmp eq i32 %lftr.wideiv25, %0
-; CHECK: br i1 %exitcond26, label %for.body3.split, label %for.cond1.preheader
-; CHECK: for.end12.loopexit:
-; CHECK: br label %for.end12
-; CHECK: for.end12:
-; CHECK: ret void
-
-;;--------------------------------------Test case 02-------------------------------------
-
-;; for(int i=0;i<100;i++)
-;; for(int j=100;j>=0;j--)
-;; A[j][i] = A[j][i]+k;
-
-define void @interchange_02(i32 %k) {
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv19 = phi i64 [ 0, %entry ], [ %indvars.iv.next20, %for.inc10 ]
- br label %for.body3
-
-for.body3:
- %indvars.iv = phi i64 [ 100, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
- %0 = load i32, i32* %arrayidx5
- %add = add nsw i32 %0, %k
- store i32 %add, i32* %arrayidx5
- %indvars.iv.next = add nsw i64 %indvars.iv, -1
- %cmp2 = icmp sgt i64 %indvars.iv, 0
- br i1 %cmp2, label %for.body3, label %for.inc10
-
-for.inc10:
- %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
- %exitcond = icmp eq i64 %indvars.iv.next20, 100
- br i1 %exitcond, label %for.end11, label %for.cond1.preheader
-
-for.end11:
- ret void
-}
-
-; CHECK-LABEL: @interchange_02
-; CHECK: entry:
-; CHECK: br label %for.body3.preheader
-; CHECK: for.cond1.preheader.preheader:
-; CHECK: br label %for.cond1.preheader
-; CHECK: for.cond1.preheader:
-; CHECK: %indvars.iv19 = phi i64 [ %indvars.iv.next20, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK: br label %for.body3.split1
-; CHECK: for.body3.preheader:
-; CHECK: br label %for.body3
-; CHECK: for.body3:
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3.split ], [ 100, %for.body3.preheader ]
-; CHECK: br label %for.cond1.preheader.preheader
-; CHECK: for.body3.split1: ; preds = %for.cond1.preheader
-; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv19
-; CHECK: %0 = load i32, i32* %arrayidx5
-; CHECK: %add = add nsw i32 %0, %k
-; CHECK: store i32 %add, i32* %arrayidx5
-; CHECK: br label %for.inc10
-; CHECK: for.body3.split:
-; CHECK: %indvars.iv.next = add nsw i64 %indvars.iv, -1
-; CHECK: %cmp2 = icmp sgt i64 %indvars.iv, 0
-; CHECK: br i1 %cmp2, label %for.body3, label %for.end11
-; CHECK: for.inc10:
-; CHECK: %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
-; CHECK: %exitcond = icmp eq i64 %indvars.iv.next20, 100
-; CHECK: br i1 %exitcond, label %for.body3.split, label %for.cond1.preheader
-; CHECK: for.end11:
-; CHECK: ret void
-
-;;--------------------------------------Test case 03-------------------------------------
-;; Loops should not be interchanged in this case as it is not profitable.
-;; for(int i=0;i<100;i++)
-;; for(int j=0;j<100;j++)
-;; A[i][j] = A[i][j]+k;
-
-define void @interchange_03(i32 %k) {
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc10 ]
- br label %for.body3
-
-for.body3:
- %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
- %0 = load i32, i32* %arrayidx5
- %add = add nsw i32 %0, %k
- store i32 %add, i32* %arrayidx5
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 100
- br i1 %exitcond, label %for.inc10, label %for.body3
-
-for.inc10:
- %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
- %exitcond23 = icmp eq i64 %indvars.iv.next22, 100
- br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
-
-for.end12:
- ret void
-}
-
-; CHECK-LABEL: @interchange_03
-; CHECK: entry:
-; CHECK: br label %for.cond1.preheader.preheader
-; CHECK: for.cond1.preheader.preheader: ; preds = %entry
-; CHECK: br label %for.cond1.preheader
-; CHECK: for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc10
-; CHECK: %indvars.iv21 = phi i64 [ %indvars.iv.next22, %for.inc10 ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK: br label %for.body3.preheader
-; CHECK: for.body3.preheader: ; preds = %for.cond1.preheader
-; CHECK: br label %for.body3
-; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
-; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv21, i64 %indvars.iv
-; CHECK: %0 = load i32, i32* %arrayidx5
-; CHECK: %add = add nsw i32 %0, %k
-; CHECK: store i32 %add, i32* %arrayidx5
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond = icmp eq i64 %indvars.iv.next, 100
-; CHECK: br i1 %exitcond, label %for.inc10, label %for.body3
-; CHECK: for.inc10: ; preds = %for.body3
-; CHECK: %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1
-; CHECK: %exitcond23 = icmp eq i64 %indvars.iv.next22, 100
-; CHECK: br i1 %exitcond23, label %for.end12, label %for.cond1.preheader
-; CHECK: for.end12: ; preds = %for.inc10
-; CHECK: ret void
-
-
-;;--------------------------------------Test case 04-------------------------------------
-;; Loops should not be interchanged in this case as it is not legal due to dependency.
-;; for(int j=0;j<99;j++)
-;; for(int i=0;i<99;i++)
-;; A[j][i+1] = A[j+1][i]+k;
-
-define void @interchange_04(i32 %k){
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
- %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
- br label %for.body3
-
-for.body3:
- %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
- %0 = load i32, i32* %arrayidx5
- %add6 = add nsw i32 %0, %k
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
- store i32 %add6, i32* %arrayidx11
- %exitcond = icmp eq i64 %indvars.iv.next, 99
- br i1 %exitcond, label %for.inc12, label %for.body3
-
-for.inc12:
- %exitcond25 = icmp eq i64 %indvars.iv.next24, 99
- br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
-
-for.end14:
- ret void
-}
-
-; CHECK-LABEL: @interchange_04
-; CHECK: entry:
-; CHECK: br label %for.cond1.preheader
-; CHECK: for.cond1.preheader: ; preds = %for.inc12, %entry
-; CHECK: %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
-; CHECK: %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
-; CHECK: br label %for.body3
-; CHECK: for.body3: ; preds = %for.body3, %for.cond1.preheader
-; CHECK: %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
-; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
-; CHECK: %0 = load i32, i32* %arrayidx5
-; CHECK: %add6 = add nsw i32 %0, %k
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
-; CHECK: store i32 %add6, i32* %arrayidx11
-; CHECK: %exitcond = icmp eq i64 %indvars.iv.next, 99
-; CHECK: br i1 %exitcond, label %for.inc12, label %for.body3
-; CHECK: for.inc12: ; preds = %for.body3
-; CHECK: %exitcond25 = icmp eq i64 %indvars.iv.next24, 99
-; CHECK: br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
-; CHECK: for.end14: ; preds = %for.inc12
-; CHECK: ret void
-
-
-
-;;--------------------------------------Test case 05-------------------------------------
-;; Loops not tightly nested are not interchanged
-;; for(int j=0;j<N;j++) {
-;; B[j] = j+k;
-;; for(int i=0;i<N;i++)
-;; A[j][i] = A[j][i]+B[j];
-;; }
-
-define void @interchange_05(i32 %k, i32 %N){
-entry:
- %cmp30 = icmp sgt i32 %N, 0
- br i1 %cmp30, label %for.body.lr.ph, label %for.end17
-
-for.body.lr.ph:
- %0 = add i32 %N, -1
- %1 = zext i32 %k to i64
- br label %for.body
-
-for.body:
- %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
- %2 = add nsw i64 %indvars.iv32, %1
- %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
- %3 = trunc i64 %2 to i32
- store i32 %3, i32* %arrayidx
- br label %for.body3
-
-for.body3:
- %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
- %4 = load i32, i32* %arrayidx7
- %add10 = add nsw i32 %3, %4
- store i32 %add10, i32* %arrayidx7
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv to i32
- %exitcond = icmp eq i32 %lftr.wideiv, %0
- br i1 %exitcond, label %for.inc15, label %for.body3
-
-for.inc15:
- %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
- %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
- %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
- br i1 %exitcond36, label %for.end17, label %for.body
-
-for.end17:
- ret void
-}
-
-; CHECK-LABEL: @interchange_05
-; CHECK: entry:
-; CHECK: %cmp30 = icmp sgt i32 %N, 0
-; CHECK: br i1 %cmp30, label %for.body.lr.ph, label %for.end17
-; CHECK: for.body.lr.ph:
-; CHECK: %0 = add i32 %N, -1
-; CHECK: %1 = zext i32 %k to i64
-; CHECK: br label %for.body
-; CHECK: for.body:
-; CHECK: %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
-; CHECK: %2 = add nsw i64 %indvars.iv32, %1
-; CHECK: %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
-; CHECK: %3 = trunc i64 %2 to i32
-; CHECK: store i32 %3, i32* %arrayidx
-; CHECK: br label %for.body3.preheader
-; CHECK: for.body3.preheader:
-; CHECK: br label %for.body3
-; CHECK: for.body3:
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
-; CHECK: %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
-; CHECK: %4 = load i32, i32* %arrayidx7
-; CHECK: %add10 = add nsw i32 %3, %4
-; CHECK: store i32 %add10, i32* %arrayidx7
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
-; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
-; CHECK: br i1 %exitcond, label %for.inc15, label %for.body3
-; CHECK: for.inc15:
-; CHECK: %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
-; CHECK: %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
-; CHECK: %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
-; CHECK: br i1 %exitcond36, label %for.end17.loopexit, label %for.body
-; CHECK: for.end17.loopexit:
-; CHECK: br label %for.end17
-; CHECK: for.end17:
-; CHECK: ret void
-
-
-;;--------------------------------------Test case 06-------------------------------------
-;; Loops not tightly nested are not interchanged
-;; for(int j=0;j<N;j++) {
-;; foo();
-;; for(int i=2;i<N;i++)
-;; A[j][i] = A[j][i]+k;
-;; }
-
-define void @interchange_06(i32 %k, i32 %N) {
-entry:
- %cmp22 = icmp sgt i32 %N, 0
- br i1 %cmp22, label %for.body.lr.ph, label %for.end12
-
-for.body.lr.ph:
- %0 = add i32 %N, -1
- br label %for.body
-
-for.body:
- %indvars.iv24 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next25, %for.inc10 ]
- tail call void (...) @foo()
- br label %for.body3
-
-for.body3:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body ]
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv24, i64 %indvars.iv
- %1 = load i32, i32* %arrayidx5
- %add = add nsw i32 %1, %k
- store i32 %add, i32* %arrayidx5
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv to i32
- %exitcond = icmp eq i32 %lftr.wideiv, %0
- br i1 %exitcond, label %for.inc10, label %for.body3
-
-for.inc10:
- %indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
- %lftr.wideiv26 = trunc i64 %indvars.iv24 to i32
- %exitcond27 = icmp eq i32 %lftr.wideiv26, %0
- br i1 %exitcond27, label %for.end12, label %for.body
-
-for.end12:
- ret void
-}
-;; Here we are checking if the inner phi is not split then we have not interchanged.
-; CHECK-LABEL: @interchange_06
-; CHECK: phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body3.preheader ]
-; CHECK-NEXT: getelementptr
-; CHECK-NEXT: %1 = load
-
-;;--------------------------------------Test case 07-------------------------------------
-;; FIXME:
-;; Test for interchange when we have an lcssa phi. This should ideally be interchanged but it is currently not supported.
-;; for(gi=1;gi<N;gi++)
-;; for(gj=1;gj<M;gj++)
-;; A[gj][gi] = A[gj - 1][gi] + C[gj][gi];
-
-@gi = common global i32 0
-@gj = common global i32 0
-
-define void @interchange_07(i32 %N, i32 %M){
-entry:
- store i32 1, i32* @gi
- %cmp21 = icmp sgt i32 %N, 1
- br i1 %cmp21, label %for.cond1.preheader.lr.ph, label %for.end16
-
-for.cond1.preheader.lr.ph:
- %cmp218 = icmp sgt i32 %M, 1
- %gi.promoted = load i32, i32* @gi
- %0 = add i32 %M, -1
- %1 = sext i32 %gi.promoted to i64
- %2 = sext i32 %N to i64
- %3 = add i32 %gi.promoted, 1
- %4 = icmp slt i32 %3, %N
- %smax = select i1 %4, i32 %N, i32 %3
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %indvars.iv25 = phi i64 [ %1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next26, %for.inc14 ]
- br i1 %cmp218, label %for.body3, label %for.inc14
-
-for.body3:
- %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.cond1.preheader ]
- %5 = add nsw i64 %indvars.iv, -1
- %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
- %6 = load i32, i32* %arrayidx5
- %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
- %7 = load i32, i32* %arrayidx9
- %add = add nsw i32 %7, %6
- %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv25
- store i32 %add, i32* %arrayidx13
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv to i32
- %exitcond = icmp eq i32 %lftr.wideiv, %0
- br i1 %exitcond, label %for.inc14, label %for.body3
-
-for.inc14:
- %inc.lcssa23 = phi i32 [ 1, %for.cond1.preheader ], [ %M, %for.body3 ]
- %indvars.iv.next26 = add nsw i64 %indvars.iv25, 1
- %cmp = icmp slt i64 %indvars.iv.next26, %2
- br i1 %cmp, label %for.cond1.preheader, label %for.cond.for.end16_crit_edge
-
-for.cond.for.end16_crit_edge:
- store i32 %inc.lcssa23, i32* @gj
- store i32 %smax, i32* @gi
- br label %for.end16
-
-for.end16:
- ret void
-}
-
-; CHECK-LABEL: @interchange_07
-; CHECK: for.body3: ; preds = %for.body3.preheader, %for.body3
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 1, %for.body3.preheader ]
-; CHECK: %5 = add nsw i64 %indvars.iv, -1
-; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %5, i64 %indvars.iv25
-; CHECK: %6 = load i32, i32* %arrayidx5
-; CHECK: %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %indvars.iv, i64 %indvars.iv25
-
-;;------------------------------------------------Test case 08-------------------------------
-;; Test for interchange in loop nest greater than 2.
-;; for(int i=0;i<100;i++)
-;; for(int j=0;j<100;j++)
-;; for(int k=0;k<100;k++)
-;; D[i][k][j] = D[i][k][j]+t;
-
-define void @interchange_08(i32 %t){
-entry:
- br label %for.cond1.preheader
-
-for.cond1.preheader: ; preds = %for.inc15, %entry
- %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
- br label %for.cond4.preheader
-
-for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
- %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
- br label %for.body6
-
-for.body6: ; preds = %for.body6, %for.cond4.preheader
- %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
- %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
- %0 = load i32, i32* %arrayidx8
- %add = add nsw i32 %0, %t
- store i32 %add, i32* %arrayidx8
- %inc = add nuw nsw i32 %k.026, 1
- %exitcond = icmp eq i32 %inc, 100
- br i1 %exitcond, label %for.inc12, label %for.body6
-
-for.inc12: ; preds = %for.body6
- %inc13 = add nuw nsw i32 %j.027, 1
- %exitcond29 = icmp eq i32 %inc13, 100
- br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
-
-for.inc15: ; preds = %for.inc12
- %inc16 = add nuw nsw i32 %i.028, 1
- %exitcond30 = icmp eq i32 %inc16, 100
- br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
-
-for.end17: ; preds = %for.inc15
- ret void
-}
-; CHECK-LABEL: @interchange_08
-; CHECK: entry:
-; CHECK: br label %for.cond1.preheader.preheader
-; CHECK: for.cond1.preheader.preheader: ; preds = %entry
-; CHECK: br label %for.cond1.preheader
-; CHECK: for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc15
-; CHECK: %i.028 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK: br label %for.body6.preheader
-; CHECK: for.cond4.preheader.preheader: ; preds = %for.body6
-; CHECK: br label %for.cond4.preheader
-; CHECK: for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc12
-; CHECK: %j.027 = phi i32 [ %inc13, %for.inc12 ], [ 0, %for.cond4.preheader.preheader ]
-; CHECK: br label %for.body6.split1
-; CHECK: for.body6.preheader: ; preds = %for.cond1.preheader
-; CHECK: br label %for.body6
-; CHECK: for.body6: ; preds = %for.body6.preheader, %for.body6.split
-; CHECK: %k.026 = phi i32 [ %inc, %for.body6.split ], [ 0, %for.body6.preheader ]
-; CHECK: br label %for.cond4.preheader.preheader
-; CHECK: for.body6.split1: ; preds = %for.cond4.preheader
-; CHECK: %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
-; CHECK: %0 = load i32, i32* %arrayidx8
-; CHECK: %add = add nsw i32 %0, %t
-; CHECK: store i32 %add, i32* %arrayidx8
-; CHECK: br label %for.inc12
-; CHECK: for.body6.split: ; preds = %for.inc12
-; CHECK: %inc = add nuw nsw i32 %k.026, 1
-; CHECK: %exitcond = icmp eq i32 %inc, 100
-; CHECK: br i1 %exitcond, label %for.inc15, label %for.body6
-; CHECK: for.inc12: ; preds = %for.body6.split1
-; CHECK: %inc13 = add nuw nsw i32 %j.027, 1
-; CHECK: %exitcond29 = icmp eq i32 %inc13, 100
-; CHECK: br i1 %exitcond29, label %for.body6.split, label %for.cond4.preheader
-; CHECK: for.inc15: ; preds = %for.body6.split
-; CHECK: %inc16 = add nuw nsw i32 %i.028, 1
-; CHECK: %exitcond30 = icmp eq i32 %inc16, 100
-; CHECK: br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
-; CHECK: for.end17: ; preds = %for.inc15
-; CHECK: ret void
-
-;;-----------------------------------Test case 09-------------------------------
-;; Test that a flow dependency in outer loop doesn't prevent interchange in
-;; loops i and j.
-;;
-;; for (int k = 0; k < 100; ++k) {
-;; T[k] = fn1();
-;; for (int i = 0; i < 1000; ++i)
-;; for(int j = 1; j < 1000; ++j)
-;; Arr[j][i] = Arr[j][i]+k;
-;; fn2(T[k]);
-;; }
-
-@T = internal global [100 x double] zeroinitializer, align 4
-@Arr = internal global [1000 x [1000 x i32]] zeroinitializer, align 4
-
-define void @interchange_09(i32 %k) {
-entry:
- br label %for.body
-
-for.cond.cleanup: ; preds = %for.cond.cleanup4
- ret void
-
-for.body: ; preds = %for.cond.cleanup4, %entry
- %indvars.iv45 = phi i64 [ 0, %entry ], [ %indvars.iv.next46, %for.cond.cleanup4 ]
- %call = call double @fn1()
- %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
- store double %call, double* %arrayidx, align 8
- br label %for.cond6.preheader
-
-for.cond6.preheader: ; preds = %for.cond.cleanup8, %for.body
- %indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.cond.cleanup8 ]
- br label %for.body9
-
-for.cond.cleanup4: ; preds = %for.cond.cleanup8
- %tmp = load double, double* %arrayidx, align 8
- call void @fn2(double %tmp)
- %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
- %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
- br i1 %exitcond47, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup8: ; preds = %for.body9
- %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
- %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
- br i1 %exitcond44, label %for.cond6.preheader, label %for.cond.cleanup4
-
-for.body9: ; preds = %for.body9, %for.cond6.preheader
- %indvars.iv = phi i64 [ 1, %for.cond6.preheader ], [ %indvars.iv.next, %for.body9 ]
- %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
- %tmp1 = load i32, i32* %arrayidx13, align 4
- %tmp2 = trunc i64 %indvars.iv45 to i32
- %add = add nsw i32 %tmp1, %tmp2
- store i32 %add, i32* %arrayidx13, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 1000
- br i1 %exitcond, label %for.body9, label %for.cond.cleanup8
-}
-
-declare double @fn1()
-declare void @fn2(double)
-
-
-
-
-
-;; After interchange %indvars.iv (j) should increment as the middle loop.
-;; After interchange %indvars.iv42 (i) should increment with the inner most loop.
-
-; CHECK-LABEL: @interchange_09
-
-; CHECK: for.body:
-; CHECK: %indvars.iv45 = phi i64 [ %indvars.iv.next46, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
-; CHECK: %call = call double @fn1()
-; CHECK: %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
-; CHECK: store double %call, double* %arrayidx, align 8
-; CHECK: br label %for.body9.preheader
-
-; CHECK: for.cond6.preheader.preheader:
-; CHECK: br label %for.cond6.preheader
-
-; CHECK: for.cond6.preheader:
-; CHECK: %indvars.iv42 = phi i64 [ %indvars.iv.next43, %for.cond.cleanup8 ], [ 0, %for.cond6.preheader.preheader ]
-; CHECK: br label %for.body9.split1
-
-; CHECK: for.body9.preheader:
-; CHECK: br label %for.body9
-
-; CHECK: for.cond.cleanup4:
-; CHECK: %tmp = load double, double* %arrayidx, align 8
-; CHECK: call void @fn2(double %tmp)
-; CHECK: %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
-; CHECK: %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
-; CHECK: br i1 %exitcond47, label %for.body, label %for.cond.cleanup
-
-; CHECK: for.cond.cleanup8:
-; CHECK: %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
-; CHECK: %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
-; CHECK: br i1 %exitcond44, label %for.cond6.preheader, label %for.body9.split
-
-; CHECK: for.body9:
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9.split ], [ 1, %for.body9.preheader ]
-; CHECK: br label %for.cond6.preheader.preheader
-
-; CHECK: for.body9.split1:
-; CHECK: %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
-; CHECK: store i32 %add, i32* %arrayidx13, align 4
-; CHECK: br label %for.cond.cleanup8
-
-; CHECK: for.body9.split:
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 1000
-; CHECK: br i1 %exitcond, label %for.body9, label %for.cond.cleanup4
-
-
-;;-----------------------------------Test case 10-------------------------------
-;; Test to make sure we can handle output dependencies.
-;;
-;; for (int i = 0; i < 2; ++i)
-;; for(int j = 0; j < 3; ++j) {
-;; A[j][i] = i;
-;; A[j][i+1] = j;
-;; }
-
-@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
-
-define void @interchange_10() {
-entry:
- br label %for.cond1.preheader
-
-for.cond.loopexit: ; preds = %for.body4
- %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
- br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup
-
-for.cond1.preheader: ; preds = %for.cond.loopexit, %entry
- %indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ]
- %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
- br label %for.body4
-
-for.cond.cleanup: ; preds = %for.cond.loopexit
- ret void
-
-for.body4: ; preds = %for.body4, %for.cond1.preheader
- %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
- %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
- %tmp = trunc i64 %indvars.iv26 to i32
- store i32 %tmp, i32* %arrayidx6, align 4
- %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
- %tmp1 = trunc i64 %indvars.iv to i32
- store i32 %tmp1, i32* %arrayidx10, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp ne i64 %indvars.iv.next, 3
- br i1 %exitcond, label %for.body4, label %for.cond.loopexit
-}
-
-; CHECK-LABEL: @interchange_10
-; CHECK: entry:
-; CHECK: br label %for.body4.preheader
-
-; CHECK: for.cond1.preheader.preheader:
-; CHECK: br label %for.cond1.preheader
-
-; CHECK: for.cond.loopexit:
-; CHECK: %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
-; CHECK: br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
-
-; CHECK: for.cond1.preheader:
-; CHECK: %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
-; CHECK: %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
-; CHECK: br label %for.body4.split1
-
-; CHECK: for.body4.preheader:
-; CHECK: br label %for.body4
-
-; CHECK: for.cond.cleanup:
-; CHECK: ret void
-
-; CHECK: for.body4:
-; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
-; CHECK: br label %for.cond1.preheader.preheader
-
-; CHECK: for.body4.split1:
-; CHECK: %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
-; CHECK: %tmp = trunc i64 %indvars.iv26 to i32
-; CHECK: store i32 %tmp, i32* %arrayidx6, align 4
-; CHECK: %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
-; CHECK: %tmp1 = trunc i64 %indvars.iv to i32
-; CHECK: store i32 %tmp1, i32* %arrayidx10, align 4
-; CHECK: br label %for.cond.loopexit
-
-; CHECK: for.body4.split:
-; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 3
-; CHECK: br i1 %exitcond, label %for.body4, label %for.cond.cleanup
diff --git a/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
new file mode 100644
index 0000000000000..e14598cfdd609
--- /dev/null
+++ b/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -0,0 +1,220 @@
+; Test optimization remarks generated by the LoopInterchange pass.
+;
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
+; RUN: -pass-remarks='loop-interchange' -S
+; RUN: cat %t | FileCheck %s
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x [100 x i32]] zeroinitializer
+@C = common global [100 x i32] zeroinitializer
+
+;;---------------------------------------Test case 01---------------------------------
+;; Loops interchange is not profitable.
+;; for(int i=1;i<N;i++)
+;; for(int j=1;j<N;j++)
+;; A[i-1][j-1] = A[i - 1][j-1] + B[i][j];
+
+define void @test01(i32 %N){
+entry:
+ %cmp31 = icmp sgt i32 %N, 1
+ br i1 %cmp31, label %for.cond1.preheader.lr.ph, label %for.end19
+
+for.cond1.preheader.lr.ph:
+ %0 = add i32 %N, -1
+ br label %for.body3.lr.ph
+
+for.body3.lr.ph:
+ %indvars.iv34 = phi i64 [ 1, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next35, %for.inc17 ]
+ %1 = add nsw i64 %indvars.iv34, -1
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ 1, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %2 = add nsw i64 %indvars.iv, -1
+ %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %1, i64 %2
+ %3 = load i32, i32* %arrayidx6
+ %arrayidx10 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @B, i64 0, i64 %indvars.iv34, i64 %indvars.iv
+ %4 = load i32, i32* %arrayidx10
+ %add = add nsw i32 %4, %3
+ store i32 %add, i32* %arrayidx6
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.inc17, label %for.body3
+
+for.inc17:
+ %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1
+ %lftr.wideiv37 = trunc i64 %indvars.iv34 to i32
+ %exitcond38 = icmp eq i32 %lftr.wideiv37, %0
+ br i1 %exitcond38, label %for.end19, label %for.body3.lr.ph
+
+for.end19:
+ ret void
+}
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: InterchangeNotProfitable
+; CHECK-NEXT: Function: test01
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - String: 'Interchanging loops is too costly (cost='
+; CHECK-NEXT: - Cost: '2'
+; CHECK-NEXT: - String: ', threshold='
+; CHECK-NEXT: - Threshold: '0'
+; CHECK-NEXT: - String: ') and it does not improve parallelism.'
+; CHECK-NEXT: ...
+
+;;--------------------------------------Test case 02------------------------------------
+;; [FIXME] This loop though valid is currently not interchanged due to the
+;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
+;; variable.(used to increment the loop counter and to access A[j+1][i+1]
+;; for(int i=0;i<N-1;i++)
+;; for(int j=1;j<N-1;j++)
+;; A[j+1][i+1] = A[j+1][i+1] + k;
+
+define void @test02(i32 %k, i32 %N) {
+ entry:
+ %sub = add nsw i32 %N, -1
+ %cmp26 = icmp sgt i32 %N, 1
+ br i1 %cmp26, label %for.cond1.preheader.lr.ph, label %for.end17
+
+ for.cond1.preheader.lr.ph:
+ %cmp324 = icmp sgt i32 %sub, 1
+ %0 = add i32 %N, -2
+ %1 = sext i32 %sub to i64
+ br label %for.cond1.preheader
+
+ for.cond.loopexit:
+ %cmp = icmp slt i64 %indvars.iv.next29, %1
+ br i1 %cmp, label %for.cond1.preheader, label %for.end17
+
+ for.cond1.preheader:
+ %indvars.iv28 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next29, %for.cond.loopexit ]
+ %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1
+ br i1 %cmp324, label %for.body4, label %for.cond.loopexit
+
+ for.body4:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4 ], [ 1, %for.cond1.preheader ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next, i64 %indvars.iv.next29
+ %2 = load i32, i32* %arrayidx7
+ %add8 = add nsw i32 %2, %k
+ store i32 %add8, i32* %arrayidx7
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.cond.loopexit, label %for.body4
+
+ for.end17:
+ ret void
+}
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: UnsupportedInsBetweenInduction
+; CHECK-NEXT: Function: test02
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - String: Found unsupported instruction between induction variable increment and branch.
+; CHECK-NEXT: ...
+
+;;-----------------------------------Test case 03-------------------------------
+;; Test to make sure we can handle output dependencies.
+;;
+;; for (int i = 0; i < 2; ++i)
+;; for(int j = 0; j < 3; ++j) {
+;; A[j][i] = i;
+;; A[j][i+1] = j;
+;; }
+
+@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
+
+define void @test03() {
+entry:
+ br label %for.cond1.preheader
+
+for.cond.loopexit: ; preds = %for.body4
+ %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
+ br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond1.preheader: ; preds = %for.cond.loopexit, %entry
+ %indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ]
+ %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
+ br label %for.body4
+
+for.cond.cleanup: ; preds = %for.cond.loopexit
+ ret void
+
+for.body4: ; preds = %for.body4, %for.cond1.preheader
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+ %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
+ %tmp = trunc i64 %indvars.iv26 to i32
+ store i32 %tmp, i32* %arrayidx6, align 4
+ %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
+ %tmp1 = trunc i64 %indvars.iv to i32
+ store i32 %tmp1, i32* %arrayidx10, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, 3
+ br i1 %exitcond, label %for.body4, label %for.cond.loopexit
+}
+
+; CHECK: --- !Passed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: Interchanged
+; CHECK-NEXT: Function: test03
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
+; CHECK-NEXT: ...
+
+;;--------------------------------------Test case 04-------------------------------------
+;; Loops not tightly nested are not interchanged
+;; for(int j=0;j<N;j++) {
+;; B[j] = j+k;
+;; for(int i=0;i<N;i++)
+;; A[j][i] = A[j][i]+B[j];
+;; }
+
+define void @test04(i32 %k, i32 %N){
+entry:
+ %cmp30 = icmp sgt i32 %N, 0
+ br i1 %cmp30, label %for.body.lr.ph, label %for.end17
+
+for.body.lr.ph:
+ %0 = add i32 %N, -1
+ %1 = zext i32 %k to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
+ %2 = add nsw i64 %indvars.iv32, %1
+ %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @C, i64 0, i64 %indvars.iv32
+ %3 = trunc i64 %2 to i32
+ store i32 %3, i32* %arrayidx
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
+ %4 = load i32, i32* %arrayidx7
+ %add10 = add nsw i32 %3, %4
+ store i32 %add10, i32* %arrayidx7
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.inc15, label %for.body3
+
+for.inc15:
+ %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
+ %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
+ %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
+ br i1 %exitcond36, label %for.end17, label %for.body
+
+for.end17:
+ ret void
+}
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass: loop-interchange
+; CHECK-NEXT: Name: NotTightlyNested
+; CHECK-NEXT: Function: test04
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - String: Cannot interchange loops because they are not tightly nested.
+; CHECK-NEXT: ...
diff --git a/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll b/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
new file mode 100644
index 0000000000000..cf4f83baea82b
--- /dev/null
+++ b/test/Transforms/LoopInterchange/not-interchanged-dependencies-1.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x i32] zeroinitializer
+
+;; Loops should not be interchanged in this case as it is not legal due to dependency.
+;; for(int j=0;j<99;j++)
+;; for(int i=0;i<99;i++)
+;; A[j][i+1] = A[j+1][i]+k;
+
+define void @interchange_04(i32 %k){
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
+ %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx5
+ %add6 = add nsw i32 %0, %k
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
+ store i32 %add6, i32* %arrayidx11
+ %exitcond = icmp eq i64 %indvars.iv.next, 99
+ br i1 %exitcond, label %for.inc12, label %for.body3
+
+for.inc12:
+ %exitcond25 = icmp eq i64 %indvars.iv.next24, 99
+ br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
+
+for.end14:
+ ret void
+}
+
+; CHECK-LABEL: @interchange_04
+; CHECK: entry:
+; CHECK: br label %for.cond1.preheader
+; CHECK: for.cond1.preheader: ; preds = %for.inc12, %entry
+; CHECK: %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.inc12 ]
+; CHECK: %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+; CHECK: br label %for.body3
+; CHECK: for.body3: ; preds = %for.body3, %for.cond1.preheader
+; CHECK: %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+; CHECK: %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv.next24, i64 %indvars.iv
+; CHECK: %0 = load i32, i32* %arrayidx5
+; CHECK: %add6 = add nsw i32 %0, %k
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv23, i64 %indvars.iv.next
+; CHECK: store i32 %add6, i32* %arrayidx11
+; CHECK: %exitcond = icmp eq i64 %indvars.iv.next, 99
+; CHECK: br i1 %exitcond, label %for.inc12, label %for.body3
+; CHECK: for.inc12: ; preds = %for.body3
+; CHECK: %exitcond25 = icmp eq i64 %indvars.iv.next24, 99
+; CHECK: br i1 %exitcond25, label %for.end14, label %for.cond1.preheader
+; CHECK: for.end14: ; preds = %for.inc12
+; CHECK: ret void
diff --git a/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll b/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
new file mode 100644
index 0000000000000..1d4d22883a4f8
--- /dev/null
+++ b/test/Transforms/LoopInterchange/not-interchanged-loop-nest-3.ll
@@ -0,0 +1,87 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
+
+;; Test for interchange in loop nest greater than 2.
+;; for(int i=0;i<100;i++)
+;; for(int j=0;j<100;j++)
+;; for(int k=0;k<100;k++)
+;; D[i][k][j] = D[i][k][j]+t;
+
+define void @interchange_08(i32 %t){
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.inc15, %entry
+ %i.028 = phi i32 [ 0, %entry ], [ %inc16, %for.inc15 ]
+ br label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.inc12, %for.cond1.preheader
+ %j.027 = phi i32 [ 0, %for.cond1.preheader ], [ %inc13, %for.inc12 ]
+ br label %for.body6
+
+for.body6: ; preds = %for.body6, %for.cond4.preheader
+ %k.026 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+ %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
+ %0 = load i32, i32* %arrayidx8
+ %add = add nsw i32 %0, %t
+ store i32 %add, i32* %arrayidx8
+ %inc = add nuw nsw i32 %k.026, 1
+ %exitcond = icmp eq i32 %inc, 100
+ br i1 %exitcond, label %for.inc12, label %for.body6
+
+for.inc12: ; preds = %for.body6
+ %inc13 = add nuw nsw i32 %j.027, 1
+ %exitcond29 = icmp eq i32 %inc13, 100
+ br i1 %exitcond29, label %for.inc15, label %for.cond4.preheader
+
+for.inc15: ; preds = %for.inc12
+ %inc16 = add nuw nsw i32 %i.028, 1
+ %exitcond30 = icmp eq i32 %inc16, 100
+ br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
+
+for.end17: ; preds = %for.inc15
+ ret void
+}
+; CHECK-LABEL: @interchange_08
+; CHECK: entry:
+; CHECK: br label %for.cond1.preheader.preheader
+; CHECK: for.cond1.preheader.preheader: ; preds = %entry
+; CHECK: br label %for.cond1.preheader
+; CHECK: for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc15
+; CHECK: %i.028 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond1.preheader.preheader ]
+; CHECK: br label %for.body6.preheader
+; CHECK: for.cond4.preheader.preheader: ; preds = %for.body6
+; CHECK: br label %for.cond4.preheader
+; CHECK: for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc12
+; CHECK: %j.027 = phi i32 [ %inc13, %for.inc12 ], [ 0, %for.cond4.preheader.preheader ]
+; CHECK: br label %for.body6.split1
+; CHECK: for.body6.preheader: ; preds = %for.cond1.preheader
+; CHECK: br label %for.body6
+; CHECK: for.body6: ; preds = %for.body6.preheader, %for.body6.split
+; CHECK: %k.026 = phi i32 [ %inc, %for.body6.split ], [ 0, %for.body6.preheader ]
+; CHECK: br label %for.cond4.preheader.preheader
+; CHECK: for.body6.split1: ; preds = %for.cond4.preheader
+; CHECK: %arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* @D, i32 0, i32 %i.028, i32 %k.026, i32 %j.027
+; CHECK: %0 = load i32, i32* %arrayidx8
+; CHECK: %add = add nsw i32 %0, %t
+; CHECK: store i32 %add, i32* %arrayidx8
+; CHECK: br label %for.inc12
+; CHECK: for.body6.split: ; preds = %for.inc12
+; CHECK: %inc = add nuw nsw i32 %k.026, 1
+; CHECK: %exitcond = icmp eq i32 %inc, 100
+; CHECK: br i1 %exitcond, label %for.inc15, label %for.body6
+; CHECK: for.inc12: ; preds = %for.body6.split1
+; CHECK: %inc13 = add nuw nsw i32 %j.027, 1
+; CHECK: %exitcond29 = icmp eq i32 %inc13, 100
+; CHECK: br i1 %exitcond29, label %for.body6.split, label %for.cond4.preheader
+; CHECK: for.inc15: ; preds = %for.body6.split
+; CHECK: %inc16 = add nuw nsw i32 %i.028, 1
+; CHECK: %exitcond30 = icmp eq i32 %inc16, 100
+; CHECK: br i1 %exitcond30, label %for.end17, label %for.cond1.preheader
+; CHECK: for.end17: ; preds = %for.inc15
+; CHECK: ret void
diff --git a/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll b/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
new file mode 100644
index 0000000000000..0cf91b09e65db
--- /dev/null
+++ b/test/Transforms/LoopInterchange/not-interchanged-tightly-nested.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -basicaa -loop-interchange -S | FileCheck %s
+;; We test the complete .ll for adjustment in outer loop header/latch and inner loop header/latch.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global [100 x [100 x i32]] zeroinitializer
+@B = common global [100 x i32] zeroinitializer
+@C = common global [100 x [100 x i32]] zeroinitializer
+@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
+
+;; Loops not tightly nested are not interchanged
+;; for(int j=0;j<N;j++) {
+;; B[j] = j+k;
+;; for(int i=0;i<N;i++)
+;; A[j][i] = A[j][i]+B[j];
+;; }
+
+define void @interchange_05(i32 %k, i32 %N){
+entry:
+ %cmp30 = icmp sgt i32 %N, 0
+ br i1 %cmp30, label %for.body.lr.ph, label %for.end17
+
+for.body.lr.ph:
+ %0 = add i32 %N, -1
+ %1 = zext i32 %k to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
+ %2 = add nsw i64 %indvars.iv32, %1
+ %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
+ %3 = trunc i64 %2 to i32
+ store i32 %3, i32* %arrayidx
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
+ %4 = load i32, i32* %arrayidx7
+ %add10 = add nsw i32 %3, %4
+ store i32 %add10, i32* %arrayidx7
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.inc15, label %for.body3
+
+for.inc15:
+ %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
+ %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
+ %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
+ br i1 %exitcond36, label %for.end17, label %for.body
+
+for.end17:
+ ret void
+}
+
+; CHECK-LABEL: @interchange_05
+; CHECK: entry:
+; CHECK: %cmp30 = icmp sgt i32 %N, 0
+; CHECK: br i1 %cmp30, label %for.body.lr.ph, label %for.end17
+; CHECK: for.body.lr.ph:
+; CHECK: %0 = add i32 %N, -1
+; CHECK: %1 = zext i32 %k to i64
+; CHECK: br label %for.body
+; CHECK: for.body:
+; CHECK: %indvars.iv32 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next33, %for.inc15 ]
+; CHECK: %2 = add nsw i64 %indvars.iv32, %1
+; CHECK: %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @B, i64 0, i64 %indvars.iv32
+; CHECK: %3 = trunc i64 %2 to i32
+; CHECK: store i32 %3, i32* %arrayidx
+; CHECK: br label %for.body3.preheader
+; CHECK: for.body3.preheader:
+; CHECK: br label %for.body3
+; CHECK: for.body3:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
+; CHECK: %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv32, i64 %indvars.iv
+; CHECK: %4 = load i32, i32* %arrayidx7
+; CHECK: %add10 = add nsw i32 %3, %4
+; CHECK: store i32 %add10, i32* %arrayidx7
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv to i32
+; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, %0
+; CHECK: br i1 %exitcond, label %for.inc15, label %for.body3
+; CHECK: for.inc15:
+; CHECK: %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
+; CHECK: %lftr.wideiv35 = trunc i64 %indvars.iv32 to i32
+; CHECK: %exitcond36 = icmp eq i32 %lftr.wideiv35, %0
+; CHECK: br i1 %exitcond36, label %for.end17.loopexit, label %for.body
+; CHECK: for.end17.loopexit:
+; CHECK: br label %for.end17
+; CHECK: for.end17:
+; CHECK: ret void
+
+
+declare void @foo(...)
+
+;; Loops not tightly nested are not interchanged
+;; for(int j=0;j<N;j++) {
+;; foo();
+;; for(int i=2;i<N;i++)
+;; A[j][i] = A[j][i]+k;
+;; }
+
+define void @interchange_06(i32 %k, i32 %N) {
+entry:
+ %cmp22 = icmp sgt i32 %N, 0
+ br i1 %cmp22, label %for.body.lr.ph, label %for.end12
+
+for.body.lr.ph:
+ %0 = add i32 %N, -1
+ br label %for.body
+
+for.body:
+ %indvars.iv24 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next25, %for.inc10 ]
+ tail call void (...) @foo()
+ br label %for.body3
+
+for.body3:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body ]
+ %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %indvars.iv24, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx5
+ %add = add nsw i32 %1, %k
+ store i32 %add, i32* %arrayidx5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %0
+ br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:
+ %indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
+ %lftr.wideiv26 = trunc i64 %indvars.iv24 to i32
+ %exitcond27 = icmp eq i32 %lftr.wideiv26, %0
+ br i1 %exitcond27, label %for.end12, label %for.body
+
+for.end12:
+ ret void
+}
+;; Here we are checking if the inner phi is not split then we have not interchanged.
+; CHECK-LABEL: @interchange_06
+; CHECK: phi i64 [ %indvars.iv.next, %for.body3 ], [ 2, %for.body3.preheader ]
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: %1 = load
diff --git a/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll b/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
new file mode 100644
index 0000000000000..6014775028ee5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -unroll-count=4 -verify-dom-info -S | FileCheck %s
+
+; REQUIRES: asserts
+; The tests below are for verifying dom tree after runtime unrolling
+; with multiple exit/exiting blocks.
+
+; We explicitly set the unroll count so that expensiveTripCount computation is allowed.
+
+; mergedexit block has edges from loop exit blocks.
+define i64 @test1() {
+; CHECK-LABEL: test1(
+; CHECK-LABEL: headerexit:
+; CHECK-NEXT: %addphi = phi i64 [ %add.iv, %header ], [ %add.iv.1, %header.1 ], [ %add.iv.2, %header.2 ], [ %add.iv.3, %header.3 ]
+; CHECK-NEXT: br label %mergedexit
+; CHECK-LABEL: latchexit:
+; CHECK-NEXT: %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ]
+; CHECK-NEXT: br label %mergedexit
+; CHECK-LABEL: mergedexit:
+; CHECK-NEXT: %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ]
+; CHECK-NEXT: ret i64 %retval
+entry:
+ br label %preheader
+
+preheader: ; preds = %bb
+ %trip = zext i32 undef to i64
+ br label %header
+
+header: ; preds = %latch, %preheader
+ %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ]
+ %add.iv = add nuw nsw i64 %iv, 2
+ %cmp1 = icmp ult i64 %add.iv, %trip
+ br i1 %cmp1, label %latch, label %headerexit
+
+latch: ; preds = %header
+ %shft = ashr i64 %add.iv, 1
+ %cmp2 = icmp ult i64 %shft, %trip
+ br i1 %cmp2, label %header, label %latchexit
+
+headerexit: ; preds = %header
+ %addphi = phi i64 [ %add.iv, %header ]
+ br label %mergedexit
+
+latchexit: ; preds = %latch
+ %shftphi = phi i64 [ %shft, %latch ]
+ br label %mergedexit
+
+mergedexit: ; preds = %latchexit, %headerexit
+ %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ]
+ ret i64 %retval
+}
+
+; mergedexit has edges from loop exit blocks and a block outside the loop.
+define void @test2(i1 %cond, i32 %n) {
+; CHECK-LABEL: header.1:
+; CHECK-NEXT: %add.iv.1 = add nuw nsw i64 %add.iv, 2
+; CHECK: br i1 %cmp1.1, label %latch.1, label %headerexit
+; CHECK-LABEL: latch.3:
+; CHECK: %cmp2.3 = icmp ult i64 %shft.3, %trip
+; CHECK-NEXT: br i1 %cmp2.3, label %header, label %latchexit, !llvm.loop
+entry:
+ br i1 %cond, label %preheader, label %mergedexit
+
+preheader: ; preds = %entry
+ %trip = zext i32 %n to i64
+ br label %header
+
+header: ; preds = %latch, %preheader
+ %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ]
+ %add.iv = add nuw nsw i64 %iv, 2
+ %cmp1 = icmp ult i64 %add.iv, %trip
+ br i1 %cmp1, label %latch, label %headerexit
+
+latch: ; preds = %header
+ %shft = ashr i64 %add.iv, 1
+ %cmp2 = icmp ult i64 %shft, %trip
+ br i1 %cmp2, label %header, label %latchexit
+
+headerexit: ; preds = %header
+ br label %mergedexit
+
+latchexit: ; preds = %latch
+ br label %mergedexit
+
+mergedexit: ; preds = %latchexit, %headerexit, %entry
+ ret void
+}
+
+
+; exitsucc is from loop exit block only.
+define i64 @test3(i32 %n) {
+; CHECK-LABEL: test3(
+; CHECK-LABEL: headerexit:
+; CHECK-NEXT: br label %exitsucc
+; CHECK-LABEL: latchexit:
+; CHECK-NEXT: %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ]
+; CHECK-NEXT: ret i64 %shftphi
+; CHECK-LABEL: exitsucc:
+; CHECK-NEXT: ret i64 96
+entry:
+ br label %preheader
+
+preheader: ; preds = %bb
+ %trip = zext i32 %n to i64
+ br label %header
+
+header: ; preds = %latch, %preheader
+ %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ]
+ %add.iv = add nuw nsw i64 %iv, 2
+ %cmp1 = icmp ult i64 %add.iv, %trip
+ br i1 %cmp1, label %latch, label %headerexit
+
+latch: ; preds = %header
+ %shft = ashr i64 %add.iv, 1
+ %cmp2 = icmp ult i64 %shft, %trip
+ br i1 %cmp2, label %header, label %latchexit
+
+headerexit: ; preds = %header
+ br label %exitsucc
+
+latchexit: ; preds = %latch
+ %shftphi = phi i64 [ %shft, %latch ]
+ ret i64 %shftphi
+
+exitsucc: ; preds = %headerexit
+ ret i64 96
+}
diff --git a/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index b5e914500fb4a..31c564779fb24 100644
--- a/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -86,10 +86,10 @@ for.end: ; preds = %for.end.loopexit, %
; AUTO_VEC-NEXT: entry:
; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp sgt i64 %n, 1
; AUTO_VEC-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 %n, i64 1
-; AUTO_VEC: br i1 {{.*}}, label %for.body, label %min.iters.checked
-; AUTO_VEC: min.iters.checked:
+; AUTO_VEC: br i1 {{.*}}, label %for.body, label %vector.ph
+; AUTO_VEC: vector.ph:
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792
-; AUTO_VEC: br i1 {{.*}}, label %for.body, label %vector.body
+; AUTO_VEC: br label %vector.body
; AUTO_VEC: middle.block:
; AUTO_VEC: [[TMP11:%.*]] = add nsw i64 [[N_VEC]], -1
; AUTO_VEC-NEXT: [[CAST_CMO:%.*]] = sitofp i64 [[TMP11]] to double
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index 49d88323523c1..f2d68fb4e62bd 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; Make sure we are preserving debug info in the vectorized code.
; CHECK: for.body.lr.ph
-; CHECK: cmp.zero = icmp eq i64 {{.*}}, 0, !dbg !{{[0-9]+}}
+; CHECK: min.iters.check = icmp ult i64 {{.*}}, 2, !dbg !{{[0-9]+}}
; CHECK: vector.body
; CHECK: index {{.*}}, !dbg ![[LOC:[0-9]+]]
; CHECK: getelementptr inbounds i32, i32* %a, {{.*}}, !dbg ![[LOC]]
diff --git a/test/Transforms/LoopVectorize/first-order-recurrence.ll b/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 0ff94c1450acf..508938958d59a 100644
--- a/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -22,7 +22,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK: middle.block:
; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
; CHECK: scalar.ph:
-; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %min.iters.checked ], [ %pre_load, %for.preheader ]
+; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %for.preheader ]
; CHECK: scalar.body:
; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
;
@@ -79,7 +79,7 @@ for.exit:
; CHECK: middle.block:
; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
; CHECK: scalar.ph:
-; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %min.iters.checked ], [ %.pre, %for.preheader ]
+; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %for.preheader ]
; CHECK: scalar.body:
; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
;
@@ -144,7 +144,7 @@ scalar.body:
; CHECK: middle.block:
; CHECK: %vector.recur.extract = extractelement <4 x i16> [[L1]], i32 3
; CHECK: scalar.ph:
-; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %min.iters.checked ], [ %0, %for.preheader ]
+; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %for.preheader ]
; CHECK: scalar.body:
; CHECK: %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
;
@@ -288,7 +288,7 @@ for.cond.cleanup3:
; UNROLL-NO-IC-LABEL: @PR30183(
; UNROLL-NO-IC: vector.ph:
-; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
+; UNROLL-NO-IC: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> undef, i32 [[PRE_LOAD:%.*]], i32 3
; UNROLL-NO-IC-NEXT: br label %vector.body
; UNROLL-NO-IC: vector.body:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
diff --git a/test/Transforms/LoopVectorize/float-induction.ll b/test/Transforms/LoopVectorize/float-induction.ll
index 8eec6e262c1a1..a7cc4530ceb39 100644
--- a/test/Transforms/LoopVectorize/float-induction.ll
+++ b/test/Transforms/LoopVectorize/float-induction.ll
@@ -15,7 +15,7 @@
; VEC4_INTERL1-LABEL: @fp_iv_loop1(
; VEC4_INTERL1: vector.ph:
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> undef, <4 x i32> zeroinitializer
@@ -37,7 +37,7 @@
; VEC4_INTERL2-LABEL: @fp_iv_loop1(
; VEC4_INTERL2: vector.ph:
-; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL2: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> undef, float %fpinc, i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> undef, <4 x i32> zeroinitializer
@@ -63,7 +63,7 @@
; VEC1_INTERL2-LABEL: @fp_iv_loop1(
; VEC1_INTERL2: vector.ph:
-; VEC1_INTERL2-NEXT: br label %vector.body
+; VEC1_INTERL2: br label %vector.body
; VEC1_INTERL2: vector.body:
; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; VEC1_INTERL2-NEXT: [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
@@ -115,7 +115,7 @@ for.end: ; preds = %for.end.loopexit, %
; VEC4_INTERL1-LABEL: @fp_iv_loop2(
; VEC4_INTERL1: vector.ph:
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[INDUCTION2:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
; VEC4_INTERL1-NEXT: br label %vector.body
@@ -172,7 +172,7 @@ for.end: ; preds = %for.end.loopexit, %
; VEC4_INTERL1: for.body.lr.ph:
; VEC4_INTERL1: [[TMP0:%.*]] = load float, float* @fp_inc, align 4
; VEC4_INTERL1: vector.ph:
-; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
+; VEC4_INTERL1: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float %init, i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> undef, float [[TMP0]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> undef, <4 x i32> zeroinitializer
@@ -250,7 +250,7 @@ for.end:
; VEC4_INTERL1-LABEL: @fp_iv_loop4(
; VEC4_INTERL1: vector.ph:
-; VEC4_INTERL1-NEXT: br label %vector.body
+; VEC4_INTERL1: br label %vector.body
; VEC4_INTERL1: vector.body:
; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
@@ -289,7 +289,7 @@ for.end: ; preds = %for.end.loopexit, %
; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar(
; VEC2_INTERL1_PRED_STORE: vector.body:
-; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ], [ 0, %min.iters.checked ]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 7f381ae6ad7b5..0d6e4b1e61b44 100644
--- a/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -13,24 +13,21 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[MIN_ITERS_CHECKED:%.*]]
-; CHECK: min.iters.checked:
-; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[N]], 3
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[CMP_ZERO:%.*]] = icmp eq i64 [[N_VEC]], 0
-; CHECK-NEXT: br i1 [[CMP_ZERO]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
-; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP6]]
-; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP5]]
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
+; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[N]], 3
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -55,10 +52,10 @@ define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[MIN_ITERS_CHECKED]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
diff --git a/test/Transforms/LoopVectorize/induction-step.ll b/test/Transforms/LoopVectorize/induction-step.ll
index 33e8ed067160d..b37537efcc513 100644
--- a/test/Transforms/LoopVectorize/induction-step.ll
+++ b/test/Transforms/LoopVectorize/induction-step.ll
@@ -15,7 +15,7 @@
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @int_inc, align 4
; CHECK: vector.ph:
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
+; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %init, i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP0]], i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
@@ -86,7 +86,7 @@ for.end: ; preds = %for.end.loopexit, %
; CHECK-LABEL: @induction_with_loop_inv(
; CHECK: vector.ph:
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
+; CHECK: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 %x.011, i32 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> undef, i32 %j.012, i32 0
; CHECK-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> undef, <8 x i32> zeroinitializer
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 7e9e6b1cdc8e3..d77806da59bed 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -501,13 +501,13 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
; condition and branch directly to the scalar loop.
; CHECK-LABEL: max_i32_backedgetaken
-; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
+; CHECK: br i1 true, label %scalar.ph, label %vector.ph
; CHECK: middle.block:
; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
; CHECK: scalar.ph:
; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
-; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
+; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ %[[v9]], %middle.block ]
define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
diff --git a/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 1e8b982363d80..89c0ac1091676 100644
--- a/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
;
; CHECK-LABEL: @interleaved_with_cond_store_0(
;
-; CHECK: min.iters.checked
+; CHECK: vector.ph
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
@@ -58,7 +58,7 @@ for.end:
;
; CHECK-LABEL: @interleaved_with_cond_store_1(
;
-; CHECK: min.iters.checked
+; CHECK: vector.ph
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
@@ -117,7 +117,7 @@ for.end:
;
; CHECK-LABEL: @interleaved_with_cond_store_2(
;
-; CHECK: min.iters.checked
+; CHECK: vector.ph
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll
index d84dc42bdf543..530c2f66552af 100644
--- a/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -338,7 +338,7 @@ for.body: ; preds = %for.body, %entry
; }
; CHECK-LABEL: @even_load_dynamic_tc(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
; CHECK: %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -579,7 +579,7 @@ for.body: ; preds = %for.body, %entry
; }
; CHECK-LABEL: @PR27626_0(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -627,7 +627,7 @@ for.end:
; }
; CHECK-LABEL: @PR27626_1(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -680,7 +680,7 @@ for.end:
; }
; CHECK-LABEL: @PR27626_2(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
@@ -728,7 +728,7 @@ for.end:
; }
; CHECK-LABEL: @PR27626_3(
-; CHECK: min.iters.checked:
+; CHECK: vector.ph:
; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
diff --git a/test/Transforms/LoopVectorize/iv_outside_user.ll b/test/Transforms/LoopVectorize/iv_outside_user.ll
index 8a44af96e7f4b..265188886996b 100644
--- a/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -135,7 +135,7 @@ for.end:
}
; CHECK-LABEL: @PR30742
-; CHECK: min.iters.checked
+; CHECK: vector.ph
; CHECK: %[[N_MOD_VF:.+]] = urem i32 %[[T5:.+]], 2
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
; CHECK: middle.block
diff --git a/test/Transforms/LoopVectorize/miniters.ll b/test/Transforms/LoopVectorize/miniters.ll
index 1cb67f9150ac2..f5f4eb5eaa01c 100644
--- a/test/Transforms/LoopVectorize/miniters.ll
+++ b/test/Transforms/LoopVectorize/miniters.ll
@@ -10,10 +10,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
; CHECK-LABEL: foo(
; CHECK: %min.iters.check = icmp ult i64 %N, 4
-; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
; UNROLL-LABEL: foo(
; UNROLL: %min.iters.check = icmp ult i64 %N, 8
-; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
define void @foo(i64 %N) {
entry:
diff --git a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
new file mode 100644
index 0000000000000..40af8f3adf029
--- /dev/null
+++ b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
@@ -0,0 +1,240 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that the vectorizer identifies the %p.09 phi,
+; as an induction variable, despite the potential overflow
+; due to the truncation from 32bit to 8bit.
+; SCEV will detect the pattern "sext(trunc(%p.09)) + %step"
+; and generate the required runtime checks under which
+; we can assume no overflow. We check here that we generate
+; exactly two runtime checks:
+; 1) an overflow check:
+; {0,+,(trunc i32 %step to i8)}<%for.body> Added Flags: <nssw>
+; 2) an equality check verifying that the step of the induction
+; is equal to sext(trunc(step)):
+; Equal predicate: %step == (sext i8 (trunc i32 %step to i8) to i32)
+;
+; See also pr30654.
+;
+; int a[N];
+; void doit1(int n, int step) {
+; int i;
+; char p = 0;
+; for (i = 0; i < n; i++) {
+; a[i] = p;
+; p = p + step;
+; }
+; }
+;
+
+; CHECK-LABEL: @doit1
+; CHECK: vector.scevcheck
+; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
+; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
+; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
+; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit1(i32 %n, i32 %step) local_unnamed_addr {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %sext = shl i32 %p.09, 24
+ %conv = ashr exact i32 %sext, 24
+ %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %add = add nsw i32 %conv, %step
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; Same as above, but for checking the SCEV "zext(trunc(%p.09)) + %step".
+; Here we expect the following two predicates to be added for runtime checking:
+; 1) {0,+,(trunc i32 %step to i8)}<%for.body> Added Flags: <nusw>
+; 2) Equal predicate: %step == (zext i8 (trunc i32 %step to i8) to i32)
+;
+; int a[N];
+; void doit2(int n, int step) {
+; int i;
+; unsigned char p = 0;
+; for (i = 0; i < n; i++) {
+; a[i] = p;
+; p = p + step;
+; }
+; }
+;
+
+; CHECK-LABEL: @doit2
+; CHECK: vector.scevcheck
+; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow
+; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]]
+; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
+; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit2(i32 %n, i32 %step) local_unnamed_addr {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %p.09 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %conv = and i32 %p.09, 255
+ %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %add = add nsw i32 %conv, %step
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; Here we check that the same phi scev analysis would fail
+; to create the runtime checks because the step is not invariant.
+; As a result vectorization will fail.
+;
+; int a[N];
+; void doit3(int n, int step) {
+; int i;
+; char p = 0;
+; for (i = 0; i < n; i++) {
+; a[i] = p;
+; p = p + step;
+; step += 2;
+; }
+; }
+;
+
+; CHECK-LABEL: @doit3
+; CHECK-NOT: vector.scevcheck
+; CHECK-NOT: vector.body:
+; CHECK-LABEL: for.body:
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit3(i32 %n, i32 %step) local_unnamed_addr {
+entry:
+ %cmp9 = icmp sgt i32 %n, 0
+ br i1 %cmp9, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %p.012 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %step.addr.010 = phi i32 [ %add3, %for.body ], [ %step, %for.body.preheader ]
+ %sext = shl i32 %p.012, 24
+ %conv = ashr exact i32 %sext, 24
+ %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %add = add nsw i32 %conv, %step.addr.010
+ %add3 = add nsw i32 %step.addr.010, 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+
+; Lastly, we also check the case where we can tell at compile time that
+; the step of the induction is equal to sext(trunc(step)), in which case
+; we don't have to check this equality at runtime (we only need the
+; runtime overflow check). Therefore only the following overflow predicate
+; will be added for runtime checking:
+; {0,+,%cstep}<%for.body> Added Flags: <nssw>
+;
+; a[N];
+; void doit4(int n, char cstep) {
+; int i;
+; char p = 0;
+; int istep = cstep;
+; for (i = 0; i < n; i++) {
+; a[i] = p;
+; p = p + istep;
+; }
+; }
+
+; CHECK-LABEL: @doit4
+; CHECK: vector.scevcheck
+; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: %{{.*}} = or i1 {{.*}}, %mul.overflow
+; CHECK-NOT: %ident.check = icmp ne i32 {{.*}}, %{{.*}}
+; CHECK-NOT: %{{.*}} = or i1 %{{.*}}, %ident.check
+; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}})
+; CHECK: vector.body:
+; CHECK: <4 x i32>
+
+; Function Attrs: norecurse nounwind uwtable
+define void @doit4(i32 %n, i8 signext %cstep) local_unnamed_addr {
+entry:
+ %conv = sext i8 %cstep to i32
+ %cmp10 = icmp sgt i32 %n, 0
+ br i1 %cmp10, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %p.011 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %sext = shl i32 %p.011, 24
+ %conv2 = ashr exact i32 %sext, 24
+ %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv2, i32* %arrayidx, align 4
+ %add = add nsw i32 %conv2, %conv
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index ac1145aab67b0..b37d94c0c328c 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -4,7 +4,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;CHECK-LABEL: @add_ints(
;CHECK: br
-;CHECK: br
;CHECK: getelementptr
;CHECK-DAG: getelementptr
;CHECK-DAG: icmp ugt
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 958b3c135c976..fb05486127156 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;CHECK-LABEL: define i32 @foo
;CHECK: for.body.preheader:
-;CHECK: br i1 %cmp.zero, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
+;CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
;CHECK: vector.memcheck:
;CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph, !dbg [[BODY_LOC]]
;CHECK: load <4 x float>