aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll2
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-call.ll30
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll122
-rw-r--r--test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll25
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir20
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir20
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-X32.mir10
-rw-r--r--test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir170
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-v128.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-v256.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-v512.mir24
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add-x32.mir20
-rw-r--r--test/CodeGen/X86/GlobalISel/select-add.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-cmp.mir130
-rw-r--r--test/CodeGen/X86/GlobalISel/select-constant.mir12
-rw-r--r--test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir16
-rw-r--r--test/CodeGen/X86/GlobalISel/select-ext.mir22
-rw-r--r--test/CodeGen/X86/GlobalISel/select-gep.mir6
-rw-r--r--test/CodeGen/X86/GlobalISel/select-inc.mir8
-rw-r--r--test/CodeGen/X86/GlobalISel/select-leaf-constant.mir8
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir56
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-scalar.mir76
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-v128.mir24
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-v256.mir32
-rw-r--r--test/CodeGen/X86/GlobalISel/select-memop-v512.mir16
-rw-r--r--test/CodeGen/X86/GlobalISel/select-mul-scalar.mir18
-rw-r--r--test/CodeGen/X86/GlobalISel/select-mul-vec.mir90
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub-v128.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub-v256.mir72
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub-v512.mir24
-rw-r--r--test/CodeGen/X86/GlobalISel/select-sub.mir60
-rw-r--r--test/CodeGen/X86/GlobalISel/select-trunc.mir24
-rw-r--r--test/CodeGen/X86/O0-pipeline.ll2
-rw-r--r--test/CodeGen/X86/atom-fixup-lea3.ll11
-rw-r--r--test/CodeGen/X86/avx-schedule.ll32
-rw-r--r--test/CodeGen/X86/avx-splat.ll36
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll4
-rw-r--r--test/CodeGen/X86/build-vector-128.ll92
-rw-r--r--test/CodeGen/X86/buildvec-insertvec.ll72
-rw-r--r--test/CodeGen/X86/clear_upper_vector_element_bits.ll195
-rw-r--r--test/CodeGen/X86/fast-isel-nontemporal.ll108
-rw-r--r--test/CodeGen/X86/full-lsr.ll10
-rw-r--r--test/CodeGen/X86/haddsub-2.ll348
-rw-r--r--test/CodeGen/X86/haddsub-undef.ll5
-rw-r--r--test/CodeGen/X86/hoist-spill.ll2
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll15
-rw-r--r--test/CodeGen/X86/madd.ll78
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll16
-rw-r--r--test/CodeGen/X86/memcmp.ll330
-rw-r--r--test/CodeGen/X86/merge-consecutive-loads-128.ll36
-rw-r--r--test/CodeGen/X86/mul-constant-i16.ll139
-rw-r--r--test/CodeGen/X86/mul-constant-i32.ll1578
-rw-r--r--test/CodeGen/X86/mul-constant-i64.ll1605
-rw-r--r--test/CodeGen/X86/mul-constant-result.ll1291
-rw-r--r--test/CodeGen/X86/nontemporal-loads.ll779
-rw-r--r--test/CodeGen/X86/pr32659.ll83
-rw-r--r--test/CodeGen/X86/select.ll14
-rw-r--r--test/CodeGen/X86/selectiondag-dominator.ll30
-rw-r--r--test/CodeGen/X86/sse-intrinsics-fast-isel.ll74
-rw-r--r--test/CodeGen/X86/sse1.ll80
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-fast-isel.ll159
-rw-r--r--test/CodeGen/X86/sse3-avx-addsub-2.ll14
-rw-r--r--test/CodeGen/X86/sse42-intrinsics-fast-isel.ll6
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx1.ll21
-rw-r--r--test/CodeGen/X86/stack-folding-int-sse42.ll17
-rw-r--r--test/CodeGen/X86/trunc-to-bool.ll70
-rw-r--r--test/CodeGen/X86/vec_fp_to_int.ll18
-rw-r--r--test/CodeGen/X86/vec_int_to_fp.ll286
-rw-r--r--test/CodeGen/X86/vec_set.ll24
-rw-r--r--test/CodeGen/X86/vector-compare-results.ll538
-rw-r--r--test/CodeGen/X86/vector-rem.ll34
-rw-r--r--test/CodeGen/X86/vector-sext.ll784
-rw-r--r--test/CodeGen/X86/vector-shuffle-v48.ll49
-rw-r--r--test/CodeGen/X86/vector-shuffle-variable-128.ll274
-rw-r--r--test/CodeGen/X86/vector-sqrt.ll18
-rw-r--r--test/CodeGen/X86/vector-unsigned-cmp.ll134
-rw-r--r--test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll4
-rw-r--r--test/CodeGen/X86/vshift-1.ll9
-rw-r--r--test/CodeGen/X86/vshift-2.ll9
-rw-r--r--test/CodeGen/X86/x86-interleaved-access.ll93
84 files changed, 7887 insertions, 3236 deletions
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index b1deb2c5f567..e04d10c9d64a 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,6 +1,6 @@
; REQUIRES: asserts
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \
-; RUN: grep "asm-printer" | grep 35
+; RUN: grep "asm-printer" | grep 33
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
deleted file mode 100644
index 6c60aed67a7b..000000000000
--- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc -mtriple i386 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
-; RUN: llc -mtriple x86_64 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
-
-define void @test_void_return() {
-; CHECK-LABEL: name: test_void_return
-; CHECK: alignment: 4
-; CHECK-NEXT: exposesReturnsTwice: false
-; CHECK-NEXT: legalized: false
-; CHECK-NEXT: regBankSelected: false
-; CHECK-NEXT: selected: false
-; CHECK-NEXT: tracksRegLiveness: true
-; CHECK-NEXT: frameInfo:
-; CHECK-NEXT: isFrameAddressTaken: false
-; CHECK-NEXT: isReturnAddressTaken: false
-; CHECK-NEXT: hasStackMap: false
-; CHECK-NEXT: hasPatchPoint: false
-; CHECK-NEXT: stackSize: 0
-; CHECK-NEXT: offsetAdjustment: 0
-; CHECK-NEXT: maxAlignment: 0
-; CHECK-NEXT: adjustsStack: false
-; CHECK-NEXT: hasCalls: false
-; CHECK-NEXT: hasOpaqueSPAdjustment: false
-; CHECK-NEXT: hasVAStart: false
-; CHECK-NEXT: hasMustTailInVarArgFunc: false
-; CHECK-NEXT: body:
-; CHECK-NEXT: bb.1.entry:
-; CHECK-NEXT: RET 0
-entry:
- ret void
-}
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
index 8ea3e4f9d739..00aa7cf84e55 100644
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
+++ b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=i386-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
@a1_8bit = external global i8
@a7_8bit = external global i8
@@ -11,8 +11,8 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
; ALL-LABEL: name: test_i8_args_8
; X64: fixedStack:
-; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false
-; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false
+; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true,
+; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
; X64: [[ARG1:%[0-9]+]](s8) = COPY %edi
; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi
@@ -26,14 +26,14 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4,
; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0)
; X32: fixedStack:
-; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 1, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 1, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 1, alignment: 8, isImmutable: true,
+; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 1, alignment: 16, isImmutable: true,
+; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true,
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 1, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s8) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -77,8 +77,8 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
; ALL-LABEL: name: test_i32_args_8
; X64: fixedStack:
-; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false
-; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false
+; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d
; X64: [[ARG1:%[0-9]+]](s32) = COPY %edi
; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi
@@ -92,14 +92,14 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4,
; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0)
; X32: fixedStack:
-; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -142,8 +142,8 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
; ALL-LABEL: name: test_i64_args_8
; X64: fixedStack:
-; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false
-; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false
+; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true,
+; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true,
; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9
; X64: [[ARG1:%[0-9]+]](s64) = COPY %rdi
; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi
@@ -157,22 +157,22 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4,
; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0)
; X32: fixedStack:
-; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false }
-; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK60:[0-9]+]], type: default, offset: 60, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK56:[0-9]+]], type: default, offset: 56, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK52:[0-9]+]], type: default, offset: 52, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK48:[0-9]+]], type: default, offset: 48, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK44:[0-9]+]], type: default, offset: 44, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK40:[0-9]+]], type: default, offset: 40, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK36:[0-9]+]], type: default, offset: 36, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK32:[0-9]+]], type: default, offset: 32, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true,
+; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true,
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1L:%[0-9]+]](s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
@@ -249,8 +249,8 @@ define float @test_float_args(float %arg1, float %arg2) {
; X64-NEXT: RET 0, implicit %xmm0
; X32: fixedStack:
-; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -270,8 +270,8 @@ define double @test_double_args(double %arg1, double %arg2) {
; X64-NEXT: RET 0, implicit %xmm0
; X32: fixedStack:
-; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false }
-; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false }
+; X32: id: [[STACK4:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true,
+; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true,
; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; X32-NEXT: [[ARG1:%[0-9]+]](s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0)
; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]]
@@ -282,6 +282,38 @@ define double @test_double_args(double %arg1, double %arg2) {
ret double %arg2
}
+define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
+; ALL: name: test_v4i32_args
+; ALL: liveins: %xmm0, %xmm1
+; ALL: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0
+; ALL-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1
+; ALL-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: RET 0, implicit %xmm0
+ ret <4 x i32> %arg2
+}
+
+define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
+; ALL: name: test_v8i32_args
+; ALL: liveins: %xmm0, %xmm1
+; ALL: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0
+; ALL-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1
+; ALL-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>)
+; ALL-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>)
+; ALL-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>)
+; ALL-NEXT: RET 0, implicit %xmm0, implicit %xmm1
+
+ ret <8 x i32> %arg1
+}
+
+define void @test_void_return() {
+; ALL-LABEL: name: test_void_return
+; ALL: bb.1.entry:
+; ALL-NEXT: RET 0
+entry:
+ ret void
+}
+
define i32 * @test_memop_i32(i32 * %p1) {
; ALL-LABEL:name: test_memop_i32
;X64 liveins: %rdi
@@ -290,7 +322,7 @@ define i32 * @test_memop_i32(i32 * %p1) {
;X64-NEXT: RET 0, implicit %rax
;X32: fixedStack:
-;X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+;X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true,
;X32: %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
;X32-NEXT: %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0)
;X32-NEXT: %eax = COPY %0(p0)
diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
deleted file mode 100644
index 90a05f5fc225..000000000000
--- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64
-
-define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
-; X64: name: test_v4i32_args
-; X64: liveins: %xmm0, %xmm1
-; X64: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0
-; X64-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1
-; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>)
-; X64-NEXT: RET 0, implicit %xmm0
- ret <4 x i32> %arg2
-}
-
-define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
-; X64: name: test_v8i32_args
-; X64: liveins: %xmm0, %xmm1
-; X64: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0
-; X64-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1
-; X64-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>)
-; X64-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>)
-; X64-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>)
-; X64-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>)
-; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1
-
- ret <8 x i32> %arg1
-}
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
index 0d66a6384107..682d01e66fa0 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir
@@ -24,9 +24,9 @@ alignment: 4
legalized: false
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment: 4
legalized: false
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -88,9 +88,9 @@ alignment: 4
legalized: false
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: _ }
-# CHECK-NEXT: - { id: 1, class: _ }
-# CHECK-NEXT: - { id: 2, class: _ }
+# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
index be62832b008a..effd26e9866d 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir
@@ -26,9 +26,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -86,9 +86,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
index d99303c3ba3b..5ae8132156d5 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir
@@ -26,9 +26,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,9 +56,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -86,9 +86,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
index 24eefd30c2ac..71ea313c4c72 100644
--- a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir
@@ -28,9 +28,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -58,9 +58,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -88,9 +88,9 @@ alignment: 4
legalized: false
regBankSelected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: _ }
-# ALL-NEXT: - { id: 1, class: _ }
-# ALL-NEXT: - { id: 2, class: _ }
+# ALL-NEXT: - { id: 0, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: _, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: _, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
index cc03f3a57f0b..ca238b29c2dd 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir
@@ -33,8 +33,8 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_mul_vec256
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -56,8 +56,8 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_vec256
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -79,8 +79,8 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_sub_vec256
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -100,8 +100,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -122,8 +122,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
index 278413ad38ef..c94ecc8e9a8d 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir
@@ -33,8 +33,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -53,8 +53,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -73,8 +73,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -93,8 +93,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: vecr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -115,8 +115,8 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vecr }
-# CHECK-NEXT: - { id: 1, class: gpr }
+# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
index a115d1fa3255..b74e03f0fe79 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir
@@ -14,11 +14,11 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
-# CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
index 1ea922ee475a..7bcc57aef4ac 100644
--- a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
+++ b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir
@@ -145,9 +145,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i8
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -172,9 +172,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i16
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -199,9 +199,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -226,9 +226,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_i64
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -253,14 +253,14 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_mul_gpr
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
-# CHECK: - { id: 2, class: gpr }
-# CHECK: - { id: 3, class: gpr }
-# CHECK: - { id: 4, class: gpr }
-# CHECK: - { id: 5, class: gpr }
-# CHECK: - { id: 6, class: gpr }
-# CHECK: - { id: 7, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK: - { id: 4, class: gpr, preferred-register: '' }
+# CHECK: - { id: 5, class: gpr, preferred-register: '' }
+# CHECK: - { id: 6, class: gpr, preferred-register: '' }
+# CHECK: - { id: 7, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -292,9 +292,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_float
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -319,9 +319,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_double
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -346,9 +346,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_v4i32
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -373,9 +373,9 @@ selected: false
tracksRegLiveness: true
# CHECK-LABEL: name: test_add_v4f32
# CHECK: registers:
-# CHECK: - { id: 0, class: vecr }
-# CHECK: - { id: 1, class: vecr }
-# CHECK: - { id: 2, class: vecr }
+# CHECK: - { id: 0, class: vecr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
+# CHECK: - { id: 2, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -399,8 +399,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i8
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -422,8 +422,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i16
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -445,8 +445,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -469,8 +469,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_i64
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -492,8 +492,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_float
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -515,8 +515,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_double
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -538,8 +538,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_load_v4i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: vecr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: vecr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -561,8 +561,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_store_i32
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -585,8 +585,8 @@ regBankSelected: false
selected: false
# CHECK-LABEL: name: test_store_i64
# CHECK: registers:
-# CHECK: - { id: 0, class: gpr }
-# CHECK: - { id: 1, class: gpr }
+# CHECK: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -610,12 +610,12 @@ selected: false
# CHECK-LABEL: name: test_store_float
# CHECK: registers:
-# FAST-NEXT: - { id: 0, class: vecr }
-# FAST-NEXT: - { id: 1, class: gpr }
-# FAST-NEXT: - { id: 2, class: gpr }
+# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' }
-# GREEDY-NEXT: - { id: 0, class: vecr }
-# GREEDY-NEXT: - { id: 1, class: gpr }
+# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -647,12 +647,12 @@ selected: false
# CHECK-LABEL: name: test_store_double
# CHECK: registers:
-# FAST-NEXT: - { id: 0, class: vecr }
-# FAST-NEXT: - { id: 1, class: gpr }
-# FAST-NEXT: - { id: 2, class: gpr }
+# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' }
-# GREEDY-NEXT: - { id: 0, class: vecr }
-# GREEDY-NEXT: - { id: 1, class: gpr }
+# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' }
+# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
@@ -682,10 +682,10 @@ alignment: 4
legalized: true
# CHECK-LABEL: name: constInt_check
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -706,10 +706,10 @@ alignment: 4
legalized: true
# CHECK-LABEL: name: trunc_check
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -729,11 +729,11 @@ name: test_gep
legalized: true
# CHECK-LABEL: name: test_gep
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
-# CHECK-NEXT: - { id: 3, class: gpr }
-# CHECK-NEXT: - { id: 4, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -757,9 +757,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -782,9 +782,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -807,9 +807,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
@@ -832,9 +832,9 @@ alignment: 4
legalized: true
regBankSelected: false
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gpr }
-# CHECK-NEXT: - { id: 1, class: gpr }
-# CHECK-NEXT: - { id: 2, class: gpr }
+# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: _ }
- { id: 1, class: _ }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v128.mir b/test/CodeGen/X86/GlobalISel/select-add-v128.mir
index a39702340bc2..4f7b6ec72d52 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v128.mir
@@ -32,19 +32,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -74,19 +74,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -116,19 +116,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -158,19 +158,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v256.mir b/test/CodeGen/X86/GlobalISel/select-add-v256.mir
index 7556c2104124..143fd9422974 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v256.mir
@@ -30,19 +30,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -70,19 +70,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -110,19 +110,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -150,19 +150,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-v512.mir b/test/CodeGen/X86/GlobalISel/select-add-v512.mir
index e90be4e996f8..6a0cd32eefd5 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-v512.mir
@@ -31,9 +31,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -57,9 +57,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -83,9 +83,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -109,9 +109,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
index 8710aaa61a21..0b864f417367 100644
--- a/test/CodeGen/X86/GlobalISel/select-add-x32.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add-x32.mir
@@ -13,16 +13,16 @@ alignment: 4
legalized: true
regBankSelected: true
# X32: registers:
-# X32-NEXT: - { id: 0, class: gr32 }
-# X32-NEXT: - { id: 1, class: gr32 }
-# X32-NEXT: - { id: 2, class: gr32 }
-# X32-NEXT: - { id: 3, class: gr32 }
-# X32-NEXT: - { id: 4, class: gpr }
-# X32-NEXT: - { id: 5, class: gr32 }
-# X32-NEXT: - { id: 6, class: gr32 }
-# X32-NEXT: - { id: 7, class: gr32 }
-# X32-NEXT: - { id: 8, class: gr32 }
-# X32-NEXT: - { id: 9, class: gpr }
+# X32-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 4, class: gpr, preferred-register: '' }
+# X32-NEXT: - { id: 5, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 6, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 7, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 8, class: gr32, preferred-register: '' }
+# X32-NEXT: - { id: 9, class: gpr, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir
index 7337ce12c395..78e6bb6913a4 100644
--- a/test/CodeGen/X86/GlobalISel/select-add.mir
+++ b/test/CodeGen/X86/GlobalISel/select-add.mir
@@ -51,9 +51,9 @@ name: test_add_i64
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -78,9 +78,9 @@ name: test_add_i32
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -106,9 +106,9 @@ legalized: true
regBankSelected: true
selected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr16 }
-# ALL-NEXT: - { id: 2, class: gr16 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -135,9 +135,9 @@ legalized: true
regBankSelected: true
selected: false
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr8 }
-# ALL-NEXT: - { id: 2, class: gr8 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -165,12 +165,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -200,12 +200,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -235,12 +235,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -271,12 +271,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-cmp.mir b/test/CodeGen/X86/GlobalISel/select-cmp.mir
index a92c388c1db9..64c8cb6b823a 100644
--- a/test/CodeGen/X86/GlobalISel/select-cmp.mir
+++ b/test/CodeGen/X86/GlobalISel/select-cmp.mir
@@ -87,11 +87,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr8 }
-# CHECK-NEXT: - { id: 1, class: gr8 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -124,11 +124,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr16 }
-# CHECK-NEXT: - { id: 1, class: gr16 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -161,11 +161,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr64 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -198,11 +198,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -235,11 +235,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -272,11 +272,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -309,11 +309,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -346,11 +346,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -383,11 +383,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -420,11 +420,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -457,11 +457,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -494,11 +494,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -531,11 +531,11 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
-# CHECK-NEXT: - { id: 2, class: gr8 }
-# CHECK-NEXT: - { id: 3, class: gr32 }
-# CHECK-NEXT: - { id: 4, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' }
+# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-constant.mir b/test/CodeGen/X86/GlobalISel/select-constant.mir
index 162de0264435..7902a5084ce6 100644
--- a/test/CodeGen/X86/GlobalISel/select-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-constant.mir
@@ -33,7 +33,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i8
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -52,7 +52,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i16
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr16 }
+# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -71,7 +71,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -90,7 +90,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i64
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -110,7 +110,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i64_u32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -129,7 +129,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i64_i32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
diff --git a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
index d1a3abfd0f93..edb467b2bf90 100644
--- a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
+++ b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir
@@ -25,10 +25,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr8 }
-# ALL-NEXT: - { id: 2, class: gr64 }
-# ALL-NEXT: - { id: 3, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -57,8 +57,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -83,8 +83,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-ext.mir b/test/CodeGen/X86/GlobalISel/select-ext.mir
index dccc20e57100..b52f1f6fa621 100644
--- a/test/CodeGen/X86/GlobalISel/select-ext.mir
+++ b/test/CodeGen/X86/GlobalISel/select-ext.mir
@@ -35,9 +35,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -63,8 +63,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -89,8 +89,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -115,8 +115,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -141,8 +141,8 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-gep.mir b/test/CodeGen/X86/GlobalISel/select-gep.mir
index c8a4dc80cb2c..61c766230035 100644
--- a/test/CodeGen/X86/GlobalISel/select-gep.mir
+++ b/test/CodeGen/X86/GlobalISel/select-gep.mir
@@ -14,9 +14,9 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: test_gep_i32
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr64_nosp }
-# CHECK-NEXT: - { id: 2, class: gr64 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr64_nosp, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-inc.mir b/test/CodeGen/X86/GlobalISel/select-inc.mir
index 7a77864091d3..47fe6ef672ba 100644
--- a/test/CodeGen/X86/GlobalISel/select-inc.mir
+++ b/test/CodeGen/X86/GlobalISel/select-inc.mir
@@ -13,10 +13,10 @@ name: test_add_i8
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# INC-NEXT: - { id: 1, class: gpr }
-# ADD-NEXT: - { id: 1, class: gr8 }
-# ALL-NEXT: - { id: 2, class: gr8 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# INC-NEXT: - { id: 1, class: gpr, preferred-register: '' }
+# ADD-NEXT: - { id: 1, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
index 539520c0b8f5..9128f19b1d24 100644
--- a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
+++ b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir
@@ -29,7 +29,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -47,7 +47,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1_optsize
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -65,7 +65,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1b
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
@@ -83,7 +83,7 @@ regBankSelected: true
selected: false
# CHECK-LABEL: name: const_i32_1_optsizeb
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
# CHECK: body:
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
index 8e6a2771db6e..09f414b48a8a 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir
@@ -49,9 +49,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr8 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -79,9 +79,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr16 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -109,9 +109,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -139,10 +139,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr8 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -176,10 +176,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -213,10 +213,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -250,9 +250,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -280,10 +280,10 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
-# ALL-NEXT: - { id: 3, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
index b57c9b0cca98..6d03d7525d20 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir
@@ -91,8 +91,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr8 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr8, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -115,8 +115,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr16 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr16, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -139,8 +139,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr32 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr32, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -163,8 +163,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -187,8 +187,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr32 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr32, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -211,9 +211,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: fr32 }
-# AVX512ALL: - { id: 1, class: fr32x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: fr32, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: fr32x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -238,8 +238,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -262,9 +262,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: fr64 }
-# AVX512ALL: - { id: 1, class: fr64x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: fr64, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: fr64x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -289,8 +289,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr32 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr32, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %edi
@@ -315,8 +315,8 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %rdi
@@ -341,9 +341,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: fr32x }
-# ALL: - { id: 1, class: gr64 }
-# ALL: - { id: 2, class: gr32 }
+# ALL: - { id: 0, class: fr32x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
+# ALL: - { id: 2, class: gr32, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
- { id: 2, class: gpr }
@@ -371,9 +371,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: fr32 }
-# AVX512ALL: - { id: 0, class: fr32x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: fr32, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: fr32x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
@@ -400,9 +400,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: fr64x }
-# ALL: - { id: 1, class: gr64 }
-# ALL: - { id: 2, class: gr64 }
+# ALL: - { id: 0, class: fr64x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
+# ALL: - { id: 2, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
- { id: 2, class: gpr }
@@ -430,9 +430,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: fr64 }
-# AVX512ALL: - { id: 0, class: fr64x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: fr64, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: fr64x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
@@ -460,8 +460,8 @@ legalized: true
regBankSelected: true
selected: false
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.ptr1)
@@ -483,8 +483,8 @@ legalized: true
regBankSelected: true
selected: false
registers:
-# ALL: - { id: 0, class: gr64 }
-# ALL: - { id: 1, class: gr64 }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: gpr }
# ALL: MOV64mr %0, 1, _, 0, _, %1 :: (store 8 into %ir.ptr1)
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
index ce3f6b91dcf6..08844657e2a2 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir
@@ -32,9 +32,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: vr128 }
-# AVX512ALL: - { id: 1, class: vr128x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -60,9 +60,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# ALL: - { id: 0, class: gr64 }
-# NO_AVX512F: - { id: 1, class: vr128 }
-# AVX512ALL: - { id: 1, class: vr128x }
+# ALL: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' }
- { id: 0, class: gpr }
- { id: 1, class: vecr }
# ALL: %0 = COPY %rdi
@@ -88,9 +88,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: vr128 }
-# AVX512ALL: - { id: 0, class: vr128x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
@@ -118,9 +118,9 @@ alignment: 4
legalized: true
regBankSelected: true
registers:
-# NO_AVX512F: - { id: 0, class: vr128 }
-# AVX512ALL: - { id: 0, class: vr128x }
-# ALL: - { id: 1, class: gr64 }
+# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' }
+# ALL: - { id: 1, class: gr64, preferred-register: '' }
- { id: 0, class: vecr }
- { id: 1, class: gpr }
# ALL: %0 = COPY %xmm0
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
index b9a7e4a8cc4a..ff371ad9989f 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir
@@ -33,12 +33,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: gr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: vr256 }
+# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: gr64 }
-# AVX512ALL-NEXT: - { id: 1, class: vr256x }
+# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -73,12 +73,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: gr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: vr256 }
+# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: gr64 }
-# AVX512ALL-NEXT: - { id: 1, class: vr256x }
+# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -113,12 +113,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: vr256 }
-# NO_AVX512F-NEXT: - { id: 1, class: gr64 }
+# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: vr256x }
-# AVX512ALL-NEXT: - { id: 1, class: gr64 }
+# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
@@ -153,12 +153,12 @@ alignment: 4
legalized: true
regBankSelected: true
# NO_AVX512F: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: vr256 }
-# NO_AVX512F-NEXT: - { id: 1, class: gr64 }
+# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
#
# AVX512ALL: registers:
-# AVX512ALL-NEXT: - { id: 0, class: vr256x }
-# AVX512ALL-NEXT: - { id: 1, class: gr64 }
+# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
index 87978a684d4c..131902d81a00 100644
--- a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir
@@ -28,8 +28,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: gr64 }
-# AVX512F-NEXT: - { id: 1, class: vr512 }
+# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -54,8 +54,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: gr64 }
-# AVX512F-NEXT: - { id: 1, class: vr512 }
+# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: vecr }
@@ -80,8 +80,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: vr512 }
-# AVX512F-NEXT: - { id: 1, class: gr64 }
+# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
@@ -106,8 +106,8 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX512F: registers:
-# AVX512F-NEXT: - { id: 0, class: vr512 }
-# AVX512F-NEXT: - { id: 1, class: gr64 }
+# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
index 34a77acc2d1e..453557c08469 100644
--- a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
+++ b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir
@@ -24,9 +24,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr16 }
-# ALL-NEXT: - { id: 1, class: gr16 }
-# ALL-NEXT: - { id: 2, class: gr16 }
+# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -55,9 +55,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -86,9 +86,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
index 5f8ab1e4f189..d3651ccd1ab9 100644
--- a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
+++ b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir
@@ -95,9 +95,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -121,9 +121,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -147,9 +147,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128x }
-# CHECK-NEXT: - { id: 1, class: vr128x }
-# CHECK-NEXT: - { id: 2, class: vr128x }
+# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -173,9 +173,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -199,9 +199,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128 }
-# CHECK-NEXT: - { id: 1, class: vr128 }
-# CHECK-NEXT: - { id: 2, class: vr128 }
+# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -225,9 +225,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128x }
-# CHECK-NEXT: - { id: 1, class: vr128x }
-# CHECK-NEXT: - { id: 2, class: vr128x }
+# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -251,9 +251,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr128x }
-# CHECK-NEXT: - { id: 1, class: vr128x }
-# CHECK-NEXT: - { id: 2, class: vr128x }
+# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -277,9 +277,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256 }
-# CHECK-NEXT: - { id: 1, class: vr256 }
-# CHECK-NEXT: - { id: 2, class: vr256 }
+# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -303,9 +303,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256x }
-# CHECK-NEXT: - { id: 1, class: vr256x }
-# CHECK-NEXT: - { id: 2, class: vr256x }
+# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -329,9 +329,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256 }
-# CHECK-NEXT: - { id: 1, class: vr256 }
-# CHECK-NEXT: - { id: 2, class: vr256 }
+# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -355,9 +355,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256x }
-# CHECK-NEXT: - { id: 1, class: vr256x }
-# CHECK-NEXT: - { id: 2, class: vr256x }
+# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -381,9 +381,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr256x }
-# CHECK-NEXT: - { id: 1, class: vr256x }
-# CHECK-NEXT: - { id: 2, class: vr256x }
+# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -407,9 +407,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr512 }
-# CHECK-NEXT: - { id: 1, class: vr512 }
-# CHECK-NEXT: - { id: 2, class: vr512 }
+# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -433,9 +433,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr512 }
-# CHECK-NEXT: - { id: 1, class: vr512 }
-# CHECK-NEXT: - { id: 2, class: vr512 }
+# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -459,9 +459,9 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: vr512 }
-# CHECK-NEXT: - { id: 1, class: vr512 }
-# CHECK-NEXT: - { id: 2, class: vr512 }
+# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
index d60d4155e29d..f77879d93009 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir
@@ -32,19 +32,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -74,19 +74,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128 }
-# AVX512VL-NEXT: - { id: 1, class: vr128 }
-# AVX512VL-NEXT: - { id: 2, class: vr128 }
+# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -116,19 +116,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -158,19 +158,19 @@ alignment: 4
legalized: true
regBankSelected: true
# NOVL: registers:
-# NOVL-NEXT: - { id: 0, class: vr128 }
-# NOVL-NEXT: - { id: 1, class: vr128 }
-# NOVL-NEXT: - { id: 2, class: vr128 }
+# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr128x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr128x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
index fbc44997b4a2..d6bde7fbb691 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir
@@ -30,19 +30,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -70,19 +70,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256 }
-# AVX512VL-NEXT: - { id: 1, class: vr256 }
-# AVX512VL-NEXT: - { id: 2, class: vr256 }
+# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -110,19 +110,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -150,19 +150,19 @@ alignment: 4
legalized: true
regBankSelected: true
# AVX2: registers:
-# AVX2-NEXT: - { id: 0, class: vr256 }
-# AVX2-NEXT: - { id: 1, class: vr256 }
-# AVX2-NEXT: - { id: 2, class: vr256 }
+# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' }
+# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' }
#
# AVX512VL: registers:
-# AVX512VL-NEXT: - { id: 0, class: vr256x }
-# AVX512VL-NEXT: - { id: 1, class: vr256x }
-# AVX512VL-NEXT: - { id: 2, class: vr256x }
+# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
#
# AVX512BWVL: registers:
-# AVX512BWVL-NEXT: - { id: 0, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 1, class: vr256x }
-# AVX512BWVL-NEXT: - { id: 2, class: vr256x }
+# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' }
+# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
index dcd05f056949..828a243b2656 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir
@@ -31,9 +31,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -57,9 +57,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -83,9 +83,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -109,9 +109,9 @@ alignment: 4
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: vr512 }
-# ALL-NEXT: - { id: 1, class: vr512 }
-# ALL-NEXT: - { id: 2, class: vr512 }
+# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir
index d4db6eec6d80..4768a2d93222 100644
--- a/test/CodeGen/X86/GlobalISel/select-sub.mir
+++ b/test/CodeGen/X86/GlobalISel/select-sub.mir
@@ -40,9 +40,9 @@ name: test_sub_i64
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr64 }
-# ALL-NEXT: - { id: 1, class: gr64 }
-# ALL-NEXT: - { id: 2, class: gr64 }
+# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -66,9 +66,9 @@ name: test_sub_i32
legalized: true
regBankSelected: true
# ALL: registers:
-# ALL-NEXT: - { id: 0, class: gr32 }
-# ALL-NEXT: - { id: 1, class: gr32 }
-# ALL-NEXT: - { id: 2, class: gr32 }
+# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' }
+# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -94,12 +94,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr32 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr32 }
-# AVX512ALL-NEXT: - { id: 0, class: fr32x }
-# AVX512ALL-NEXT: - { id: 1, class: fr32x }
-# AVX512ALL-NEXT: - { id: 2, class: fr32x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -128,12 +128,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512F-NEXT: - { id: 0, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 1, class: fr64 }
-# NO_AVX512F-NEXT: - { id: 2, class: fr64 }
-# AVX512ALL-NEXT: - { id: 0, class: fr64x }
-# AVX512ALL-NEXT: - { id: 1, class: fr64x }
-# AVX512ALL-NEXT: - { id: 2, class: fr64x }
+# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' }
+# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' }
+# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -161,12 +161,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
@@ -196,12 +196,12 @@ regBankSelected: true
selected: false
tracksRegLiveness: true
# ALL: registers:
-# NO_AVX512VL-NEXT: - { id: 0, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 1, class: vr128 }
-# NO_AVX512VL-NEXT: - { id: 2, class: vr128 }
-# AVX512VL-NEXT: - { id: 0, class: vr128x }
-# AVX512VL-NEXT: - { id: 1, class: vr128x }
-# AVX512VL-NEXT: - { id: 2, class: vr128x }
+# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' }
+# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' }
+# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' }
registers:
- { id: 0, class: vecr }
- { id: 1, class: vecr }
diff --git a/test/CodeGen/X86/GlobalISel/select-trunc.mir b/test/CodeGen/X86/GlobalISel/select-trunc.mir
index 9b90543d6559..4df585628ddc 100644
--- a/test/CodeGen/X86/GlobalISel/select-trunc.mir
+++ b/test/CodeGen/X86/GlobalISel/select-trunc.mir
@@ -38,8 +38,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -64,8 +64,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -90,8 +90,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr32 }
-# CHECK-NEXT: - { id: 1, class: gr16 }
+# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -116,8 +116,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit }
-# CHECK-NEXT: - { id: 1, class: gr8 }
+# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -142,8 +142,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr16 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
@@ -168,8 +168,8 @@ alignment: 4
legalized: true
regBankSelected: true
# CHECK: registers:
-# CHECK-NEXT: - { id: 0, class: gr64 }
-# CHECK-NEXT: - { id: 1, class: gr32 }
+# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' }
+# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' }
registers:
- { id: 0, class: gpr }
- { id: 1, class: gpr }
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index 874e3e379d8e..5e375cc42e01 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll
@@ -5,12 +5,12 @@
; CHECK-LABEL: Pass Arguments:
; CHECK-NEXT: Target Library Information
; CHECK-NEXT: Target Pass Configuration
+; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Target Transform Information
; CHECK-NEXT: Type-Based Alias Analysis
; CHECK-NEXT: Scoped NoAlias Alias Analysis
; CHECK-NEXT: Assumption Cache Tracker
; CHECK-NEXT: Create Garbage Collector Module Metadata
-; CHECK-NEXT: Machine Module Information
; CHECK-NEXT: Machine Branch Probability Analysis
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: Pre-ISel Intrinsic Lowering
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
index ed2df277480e..e79d2e69e347 100644
--- a/test/CodeGen/X86/atom-fixup-lea3.ll
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
-; CHECK: addl ([[reg:%[a-z]+]])
-; CHECK-NEXT: addl $4, [[reg]]
+; CHECK: addl ({{%[a-z]+}},[[reg:%[a-z]+]],4)
+; CHECK-NEXT: movl
+; CHECK-NEXT: addl 4({{%[a-z]+}},[[reg:%[a-z]+]],4)
+; CHECK-NEXT: incl
; Test for the FixupLEAs pre-emit pass.
; An LEA should NOT be substituted for the ADD instruction
@@ -20,7 +22,7 @@
; return sum;
;}
-define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
+define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %k, i32* nocapture %l, i32* nocapture %m, i32* nocapture %array2) #0 {
entry:
%cmp7 = icmp sgt i32 %n, 0
br i1 %cmp7, label %for.body.lr.ph, label %for.end
@@ -35,6 +37,9 @@ for.body: ; preds = %for.body, %for.body
%j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
%inc1 = add nsw i32 %j.09, 1
%arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09
+ store i32 %0, i32* %m, align 4
+ store i32 %sum.010, i32* %m, align 4
+ store i32 %0, i32* %m, align 4
%1 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %1
store i32 %add, i32* %m, align 4
diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll
index bb05481e313d..47e95fe31bdf 100644
--- a/test/CodeGen/X86/avx-schedule.ll
+++ b/test/CodeGen/X86/avx-schedule.ll
@@ -910,14 +910,14 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double
;
; BTVER2-LABEL: test_haddpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_haddpd:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
%2 = load <4 x double>, <4 x double> *%a2, align 32
@@ -941,14 +941,14 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
;
; BTVER2-LABEL: test_haddps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_haddps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
%2 = load <8 x float>, <8 x float> *%a2, align 32
@@ -972,14 +972,14 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double
;
; BTVER2-LABEL: test_hsubpd:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_hsubpd:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
%2 = load <4 x double>, <4 x double> *%a2, align 32
@@ -1003,14 +1003,14 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%
;
; BTVER2-LABEL: test_hsubps:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_hsubps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
+; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
+; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
; ZNVER1-NEXT: retq # sched: [4:1.00]
%1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
%2 = load <8 x float>, <8 x float> *%a2, align 32
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 1914b5134bee..91d1f64c6706 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcA:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
@@ -14,7 +14,7 @@ entry:
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcB:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -26,7 +26,7 @@ entry:
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcC:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmovq %rdi, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -41,7 +41,7 @@ entry:
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcD:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
@@ -58,20 +58,20 @@ entry:
;
define <8 x float> @funcE() nounwind {
; CHECK-LABEL: funcE:
-; CHECK: ## BB#0: ## %for_exit499
+; CHECK: # BB#0: # %for_exit499
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: ## implicit-def: %YMM0
+; CHECK-NEXT: # implicit-def: %YMM0
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne LBB4_2
-; CHECK-NEXT: ## BB#1: ## %load.i1247
+; CHECK-NEXT: jne .LBB4_2
+; CHECK-NEXT: # BB#1: # %load.i1247
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-32, %rsp
-; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520
+; CHECK-NEXT: subq $1312, %rsp # imm = 0x520
; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
-; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249
+; CHECK-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
; CHECK-NEXT: retq
allocas:
%udx495 = alloca [18 x [18 x float]], align 32
@@ -99,7 +99,7 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex
define <8 x float> @funcF(i32 %val) nounwind {
; CHECK-LABEL: funcF:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vmovd %edi, %xmm0
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -112,7 +112,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcG:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: retq
@@ -123,7 +123,7 @@ entry:
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
; CHECK-LABEL: funcH:
-; CHECK: ## BB#0: ## %entry
+; CHECK: # BB#0: # %entry
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; CHECK-NEXT: retq
@@ -134,7 +134,7 @@ entry:
define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
; CHECK-LABEL: splat_load_2f64_11:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; CHECK-NEXT: retq
%x = load <2 x double>, <2 x double>* %ptr
@@ -144,7 +144,7 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
; CHECK-LABEL: splat_load_4f64_2222:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
; CHECK-NEXT: retq
%x = load <4 x double>, <4 x double>* %ptr
@@ -154,7 +154,7 @@ define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
; CHECK-LABEL: splat_load_4f32_0000:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
; CHECK-NEXT: retq
%x = load <4 x float>, <4 x float>* %ptr
@@ -164,7 +164,7 @@ define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
; CHECK-LABEL: splat_load_8f32_77777777:
-; CHECK: ## BB#0:
+; CHECK: # BB#0:
; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
; CHECK-NEXT: retq
%x = load <8 x float>, <8 x float>* %ptr
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 8f6afa8785d0..140299f5495d 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1549,8 +1549,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpextrb $8, %xmm0, %eax
; NOVL-NEXT: andl $1, %eax
@@ -1579,8 +1577,6 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
-; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; NOVL-NEXT: retq
diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll
index 8c3a6790ffa6..c73d7654045e 100644
--- a/test/CodeGen/X86/build-vector-128.ll
+++ b/test/CodeGen/X86/build-vector-128.ll
@@ -41,9 +41,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
;
; SSE2-64-LABEL: test_buildvector_v4f32:
; SSE2-64: # BB#0:
-; SSE2-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-64-NEXT: retq
;
; SSE41-64-LABEL: test_buildvector_v4f32:
@@ -74,13 +74,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa
define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) {
; SSE2-32-LABEL: test_buildvector_v2i64:
; SSE2-32: # BB#0:
-; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-32-NEXT: retl
;
; SSE-64-LABEL: test_buildvector_v2i64:
@@ -126,12 +122,12 @@ define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) {
; SSE2-64-LABEL: test_buildvector_v4i32:
; SSE2-64: # BB#0:
; SSE2-64-NEXT: movd %ecx, %xmm0
-; SSE2-64-NEXT: movd %esi, %xmm1
+; SSE2-64-NEXT: movd %edx, %xmm1
; SSE2-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-64-NEXT: movd %edx, %xmm2
+; SSE2-64-NEXT: movd %esi, %xmm2
; SSE2-64-NEXT: movd %edi, %xmm0
; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-64-NEXT: retq
;
; SSE41-64-LABEL: test_buildvector_v4i32:
@@ -170,34 +166,34 @@ define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: test_buildvector_v8i16:
; SSE2-64: # BB#0:
-; SSE2-64-NEXT: movd %ecx, %xmm0
+; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-64-NEXT: movd %r9d, %xmm1
-; SSE2-64-NEXT: movd %esi, %xmm2
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-64-NEXT: movd %r9d, %xmm0
+; SSE2-64-NEXT: movd %r8d, %xmm2
; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-64-NEXT: movd %ecx, %xmm0
; SSE2-64-NEXT: movd %edx, %xmm1
-; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-64-NEXT: movd %r8d, %xmm3
+; SSE2-64-NEXT: movd %esi, %xmm3
; SSE2-64-NEXT: movd %edi, %xmm0
; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-64-NEXT: retq
;
; SSE41-32-LABEL: test_buildvector_v8i16:
@@ -267,31 +263,31 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: test_buildvector_v16i8:
@@ -299,34 +295,34 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-64-NEXT: movd %ecx, %xmm0
-; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-64-NEXT: movd %r9d, %xmm1
+; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-64-NEXT: movd %esi, %xmm2
-; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-64-NEXT: movd %edx, %xmm3
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-64-NEXT: movd %r8d, %xmm1
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-64-NEXT: movd %r9d, %xmm0
+; SSE2-64-NEXT: movd %r8d, %xmm2
+; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-64-NEXT: movd %ecx, %xmm0
+; SSE2-64-NEXT: movd %edx, %xmm1
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-64-NEXT: movd %esi, %xmm4
; SSE2-64-NEXT: movd %edi, %xmm0
-; SSE2-64-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-64-NEXT: retq
;
; SSE41-32-LABEL: test_buildvector_v16i8:
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
index 730376acdc93..cd5abc1373b9 100644
--- a/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -75,9 +75,9 @@ entry:
define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) {
; SSE2-LABEL: test_buildvector_v4f32_register:
; SSE2: # BB#0:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_register:
@@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_load:
@@ -126,10 +126,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* %
define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) {
; SSE2-LABEL: test_buildvector_v4f32_partial_load:
; SSE2: # BB#0:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4f32_partial_load:
@@ -150,12 +150,12 @@ define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32
; SSE2-LABEL: test_buildvector_v4i32_register:
; SSE2: # BB#0:
; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: movd %esi, %xmm1
+; SSE2-NEXT: movd %edx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: movd %edx, %xmm2
+; SSE2-NEXT: movd %esi, %xmm2
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4i32_register:
@@ -178,7 +178,7 @@ define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) {
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: movd %esi, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v4i32_partial:
@@ -228,21 +228,21 @@ define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 %
define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) {
; SSE2-LABEL: test_buildvector_v8i16_register:
; SSE2: # BB#0:
-; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: movd %r9d, %xmm1
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movd %r9d, %xmm0
+; SSE2-NEXT: movd %r8d, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movd %edx, %xmm1
-; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movd %r8d, %xmm3
+; SSE2-NEXT: movd %esi, %xmm3
; SSE2-NEXT: movd %edi, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v8i16_register:
@@ -333,34 +333,34 @@ define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: movd %r9d, %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: movd %edx, %xmm3
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT: movd %r8d, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %r9d, %xmm0
+; SSE2-NEXT: movd %r8d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: movd %esi, %xmm4
; SSE2-NEXT: movd %edi, %xmm0
-; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_buildvector_v16i8_register:
diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 1218b68b1be4..f6d816ec8919 100644
--- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -159,28 +159,7 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind {
define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
; SSE-LABEL: _clearupper8xi16a:
; SSE: # BB#0:
-; SSE-NEXT: pextrw $1, %xmm0, %eax
-; SSE-NEXT: pextrw $2, %xmm0, %r9d
-; SSE-NEXT: pextrw $3, %xmm0, %edx
-; SSE-NEXT: pextrw $4, %xmm0, %r8d
-; SSE-NEXT: pextrw $5, %xmm0, %edi
-; SSE-NEXT: pextrw $6, %xmm0, %esi
-; SSE-NEXT: pextrw $7, %xmm0, %ecx
-; SSE-NEXT: movd %ecx, %xmm1
-; SSE-NEXT: movd %edx, %xmm2
-; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE-NEXT: movd %edi, %xmm1
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT: movd %esi, %xmm1
-; SSE-NEXT: movd %r9d, %xmm2
-; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE-NEXT: movd %r8d, %xmm1
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: _clearupper8xi16a:
@@ -225,61 +204,9 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind {
define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind {
; SSE-LABEL: _clearupper16xi16a:
; SSE: # BB#0:
-; SSE-NEXT: pushq %rbp
-; SSE-NEXT: pushq %r15
-; SSE-NEXT: pushq %r14
-; SSE-NEXT: pushq %r12
-; SSE-NEXT: pushq %rbx
-; SSE-NEXT: pextrw $1, %xmm0, %edi
-; SSE-NEXT: pextrw $2, %xmm0, %eax
-; SSE-NEXT: pextrw $3, %xmm0, %ecx
-; SSE-NEXT: pextrw $4, %xmm0, %edx
-; SSE-NEXT: pextrw $5, %xmm0, %esi
-; SSE-NEXT: pextrw $6, %xmm0, %ebx
-; SSE-NEXT: pextrw $7, %xmm0, %ebp
-; SSE-NEXT: pextrw $1, %xmm1, %r10d
-; SSE-NEXT: pextrw $2, %xmm1, %r9d
-; SSE-NEXT: pextrw $3, %xmm1, %r14d
-; SSE-NEXT: pextrw $4, %xmm1, %r8d
-; SSE-NEXT: pextrw $5, %xmm1, %r15d
-; SSE-NEXT: pextrw $6, %xmm1, %r11d
-; SSE-NEXT: pextrw $7, %xmm1, %r12d
-; SSE-NEXT: movd %ebp, %xmm2
-; SSE-NEXT: movd %ecx, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT: movd %esi, %xmm2
-; SSE-NEXT: movd %edi, %xmm4
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT: movd %ebx, %xmm2
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE-NEXT: movd %edx, %xmm2
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: movd %r12d, %xmm3
-; SSE-NEXT: movd %r14d, %xmm4
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT: movd %r15d, %xmm3
-; SSE-NEXT: movd %r10d, %xmm5
-; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
-; SSE-NEXT: movd %r11d, %xmm3
-; SSE-NEXT: movd %r9d, %xmm4
-; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE-NEXT: movd %r8d, %xmm3
-; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
-; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
-; SSE-NEXT: pand %xmm2, %xmm1
-; SSE-NEXT: popq %rbx
-; SSE-NEXT: popq %r12
-; SSE-NEXT: popq %r14
-; SSE-NEXT: popq %r15
-; SSE-NEXT: popq %rbp
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE-NEXT: andps %xmm2, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: _clearupper16xi16a:
@@ -364,10 +291,9 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -375,31 +301,32 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
@@ -486,10 +413,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -497,31 +423,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm2
+; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -531,10 +458,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm4
+; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@@ -542,31 +468,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm3
+; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
-; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
+; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: movd %eax, %xmm5
-; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
+; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm4
+; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: movd %eax, %xmm6
-; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: retq
;
diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll
index 4140721bd5f3..33d001cdc216 100644
--- a/test/CodeGen/X86/fast-isel-nontemporal.ll
+++ b/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xfloat:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xfloat:
@@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt4xdouble:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt4xdouble:
@@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt32xi8:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi8:
@@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xi16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi16:
@@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xi32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi32:
@@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt4xi64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt4xi64:
@@ -957,8 +981,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xfloat:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xfloat:
@@ -1003,8 +1035,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xdouble:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %ymm0
-; AVX1-NEXT: vmovapd 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xdouble:
@@ -1049,8 +1089,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt64xi8:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt64xi8:
@@ -1101,8 +1149,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt32xi16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt32xi16:
@@ -1153,8 +1209,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt16xi32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt16xi32:
@@ -1199,8 +1263,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) {
;
; AVX1-LABEL: test_load_nt8xi64:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: # implicit-def: %YMM1
+; AVX1-NEXT: vmovaps %xmm2, %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_load_nt8xi64:
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 85b2b41fa191..068480873c23 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,16 +1,10 @@
; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck %s
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
-; ATOM: foo
-; ATOM: addl
-; ATOM: addl
-; ATOM: leal
; CHECK: foo
-; CHECK: addl
-; CHECK: addl
-; CHECK: addl
+; CHECK: incl
entry:
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll
index 4596b83f7bc2..fd023d018031 100644
--- a/test/CodeGen/X86/haddsub-2.ll
+++ b/test/CodeGen/X86/haddsub-2.ll
@@ -142,12 +142,12 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %edi
; SSE3-NEXT: addl %eax, %edi
; SSE3-NEXT: movd %edi, %xmm0
-; SSE3-NEXT: movd %edx, %xmm1
+; SSE3-NEXT: movd %esi, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %esi, %xmm2
+; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: movd %ecx, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phadd_d_test1:
@@ -196,16 +196,16 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %esi
; SSE3-NEXT: addl %eax, %esi
; SSE3-NEXT: movd %esi, %xmm0
+; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; SSE3-NEXT: movd %xmm2, %eax
+; SSE3-NEXT: movd %xmm1, %esi
+; SSE3-NEXT: addl %eax, %esi
+; SSE3-NEXT: movd %esi, %xmm1
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE3-NEXT: movd %ecx, %xmm2
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: movd %xmm1, %ecx
-; SSE3-NEXT: addl %eax, %ecx
-; SSE3-NEXT: movd %ecx, %xmm1
; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phadd_d_test2:
@@ -258,12 +258,12 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %edi
; SSE3-NEXT: subl %edi, %esi
; SSE3-NEXT: movd %esi, %xmm0
-; SSE3-NEXT: movd %ecx, %xmm1
+; SSE3-NEXT: movd %edx, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %edx, %xmm2
+; SSE3-NEXT: movd %ecx, %xmm2
; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phsub_d_test1:
@@ -312,16 +312,16 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %esi
; SSE3-NEXT: subl %esi, %edx
; SSE3-NEXT: movd %edx, %xmm0
+; SSE3-NEXT: movd %xmm1, %edx
+; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE3-NEXT: movd %xmm1, %esi
+; SSE3-NEXT: subl %esi, %edx
+; SSE3-NEXT: movd %edx, %xmm1
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE3-NEXT: movd %eax, %xmm2
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %edx
-; SSE3-NEXT: subl %edx, %eax
-; SSE3-NEXT: movd %eax, %xmm1
; SSE3-NEXT: movd %ecx, %xmm0
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: phsub_d_test2:
@@ -518,19 +518,19 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %r9d
; SSE3-NEXT: addl %edx, %r9d
-; SSE3-NEXT: movd %xmm1, %esi
+; SSE3-NEXT: movd %xmm1, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r10d
-; SSE3-NEXT: addl %esi, %r10d
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %esi
+; SSE3-NEXT: addl %edx, %esi
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %edi
-; SSE3-NEXT: addl %esi, %edi
+; SSE3-NEXT: addl %edx, %edi
; SSE3-NEXT: movd %xmm2, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r11d
-; SSE3-NEXT: addl %eax, %r11d
+; SSE3-NEXT: movd %xmm0, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
@@ -541,24 +541,24 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: addl %eax, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
-; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: movd %xmm0, %r11d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
-; SSE3-NEXT: movd %xmm0, %esi
-; SSE3-NEXT: addl %eax, %esi
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: addl %r11d, %eax
; SSE3-NEXT: movd %edi, %xmm0
-; SSE3-NEXT: movd %r9d, %xmm1
+; SSE3-NEXT: movd %esi, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %r10d, %xmm2
+; SSE3-NEXT: movd %r9d, %xmm2
; SSE3-NEXT: movd %r8d, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd %edx, %xmm3
-; SSE3-NEXT: movd %r11d, %xmm1
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: movd %r10d, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: avx2_vphadd_d_test:
@@ -658,83 +658,83 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: addl %eax, %ecx
; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; SSE3-NEXT: pextrw $2, %xmm0, %eax
-; SSE3-NEXT: pextrw $3, %xmm0, %r11d
-; SSE3-NEXT: addl %eax, %r11d
+; SSE3-NEXT: pextrw $3, %xmm0, %ecx
+; SSE3-NEXT: addl %eax, %ecx
+; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; SSE3-NEXT: pextrw $4, %xmm0, %eax
-; SSE3-NEXT: pextrw $5, %xmm0, %r10d
-; SSE3-NEXT: addl %eax, %r10d
+; SSE3-NEXT: pextrw $5, %xmm0, %r11d
+; SSE3-NEXT: addl %eax, %r11d
; SSE3-NEXT: pextrw $6, %xmm0, %eax
-; SSE3-NEXT: pextrw $7, %xmm0, %r13d
-; SSE3-NEXT: addl %eax, %r13d
+; SSE3-NEXT: pextrw $7, %xmm0, %r15d
+; SSE3-NEXT: addl %eax, %r15d
; SSE3-NEXT: movd %xmm1, %eax
-; SSE3-NEXT: pextrw $1, %xmm1, %r14d
-; SSE3-NEXT: addl %eax, %r14d
+; SSE3-NEXT: pextrw $1, %xmm1, %r13d
+; SSE3-NEXT: addl %eax, %r13d
; SSE3-NEXT: pextrw $2, %xmm1, %eax
-; SSE3-NEXT: pextrw $3, %xmm1, %ebp
-; SSE3-NEXT: addl %eax, %ebp
-; SSE3-NEXT: pextrw $4, %xmm1, %eax
-; SSE3-NEXT: pextrw $5, %xmm1, %ebx
+; SSE3-NEXT: pextrw $3, %xmm1, %ebx
; SSE3-NEXT: addl %eax, %ebx
+; SSE3-NEXT: pextrw $4, %xmm1, %eax
+; SSE3-NEXT: pextrw $5, %xmm1, %r8d
+; SSE3-NEXT: addl %eax, %r8d
; SSE3-NEXT: pextrw $6, %xmm1, %eax
-; SSE3-NEXT: pextrw $7, %xmm1, %edx
-; SSE3-NEXT: addl %eax, %edx
+; SSE3-NEXT: pextrw $7, %xmm1, %esi
+; SSE3-NEXT: addl %eax, %esi
; SSE3-NEXT: movd %xmm2, %eax
-; SSE3-NEXT: pextrw $1, %xmm2, %ecx
-; SSE3-NEXT: addl %eax, %ecx
-; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT: pextrw $1, %xmm2, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pextrw $2, %xmm2, %eax
-; SSE3-NEXT: pextrw $3, %xmm2, %r12d
-; SSE3-NEXT: addl %eax, %r12d
+; SSE3-NEXT: pextrw $3, %xmm2, %r14d
+; SSE3-NEXT: addl %eax, %r14d
; SSE3-NEXT: pextrw $4, %xmm2, %eax
-; SSE3-NEXT: pextrw $5, %xmm2, %r15d
-; SSE3-NEXT: addl %eax, %r15d
+; SSE3-NEXT: pextrw $5, %xmm2, %r12d
+; SSE3-NEXT: addl %eax, %r12d
; SSE3-NEXT: pextrw $6, %xmm2, %eax
-; SSE3-NEXT: pextrw $7, %xmm2, %r8d
-; SSE3-NEXT: addl %eax, %r8d
-; SSE3-NEXT: movd %xmm3, %eax
-; SSE3-NEXT: pextrw $1, %xmm3, %r9d
+; SSE3-NEXT: pextrw $7, %xmm2, %r9d
; SSE3-NEXT: addl %eax, %r9d
-; SSE3-NEXT: pextrw $2, %xmm3, %eax
-; SSE3-NEXT: pextrw $3, %xmm3, %esi
-; SSE3-NEXT: addl %eax, %esi
-; SSE3-NEXT: pextrw $4, %xmm3, %eax
-; SSE3-NEXT: pextrw $5, %xmm3, %edi
-; SSE3-NEXT: addl %eax, %edi
-; SSE3-NEXT: pextrw $6, %xmm3, %ecx
+; SSE3-NEXT: movd %xmm3, %eax
+; SSE3-NEXT: pextrw $1, %xmm3, %ebp
+; SSE3-NEXT: addl %eax, %ebp
+; SSE3-NEXT: pextrw $2, %xmm3, %edx
+; SSE3-NEXT: pextrw $3, %xmm3, %edi
+; SSE3-NEXT: addl %edx, %edi
+; SSE3-NEXT: pextrw $4, %xmm3, %edx
+; SSE3-NEXT: pextrw $5, %xmm3, %ecx
+; SSE3-NEXT: addl %edx, %ecx
+; SSE3-NEXT: pextrw $6, %xmm3, %edx
; SSE3-NEXT: pextrw $7, %xmm3, %eax
-; SSE3-NEXT: addl %ecx, %eax
-; SSE3-NEXT: movd %edx, %xmm8
-; SSE3-NEXT: movd %r13d, %xmm3
-; SSE3-NEXT: movd %ebp, %xmm9
-; SSE3-NEXT: movd %r11d, %xmm4
-; SSE3-NEXT: movd %ebx, %xmm10
-; SSE3-NEXT: movd %r10d, %xmm7
-; SSE3-NEXT: movd %r14d, %xmm11
+; SSE3-NEXT: addl %edx, %eax
+; SSE3-NEXT: movd %esi, %xmm8
+; SSE3-NEXT: movd %r8d, %xmm3
+; SSE3-NEXT: movd %ebx, %xmm9
+; SSE3-NEXT: movd %r13d, %xmm4
+; SSE3-NEXT: movd %r15d, %xmm10
+; SSE3-NEXT: movd %r11d, %xmm7
+; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm11 # 4-byte Folded Reload
+; SSE3-NEXT: # xmm11 = mem[0],zero,zero,zero
; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero
; SSE3-NEXT: movd %eax, %xmm12
-; SSE3-NEXT: movd %r8d, %xmm6
-; SSE3-NEXT: movd %esi, %xmm13
-; SSE3-NEXT: movd %r12d, %xmm5
-; SSE3-NEXT: movd %edi, %xmm14
-; SSE3-NEXT: movd %r15d, %xmm2
-; SSE3-NEXT: movd %r9d, %xmm15
-; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
-; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero
+; SSE3-NEXT: movd %ecx, %xmm6
+; SSE3-NEXT: movd %edi, %xmm13
+; SSE3-NEXT: movd %ebp, %xmm5
+; SSE3-NEXT: movd %r9d, %xmm14
+; SSE3-NEXT: movd %r12d, %xmm2
+; SSE3-NEXT: movd %r14d, %xmm15
+; SSE3-NEXT: movd %r10d, %xmm1
; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE3-NEXT: popq %rbx
; SSE3-NEXT: popq %r12
; SSE3-NEXT: popq %r13
@@ -858,12 +858,12 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
; SSE-NEXT: movd %xmm0, %edi
; SSE-NEXT: subl %edi, %esi
; SSE-NEXT: movd %esi, %xmm0
-; SSE-NEXT: movd %ecx, %xmm1
+; SSE-NEXT: movd %edx, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: movd %edx, %xmm2
+; SSE-NEXT: movd %ecx, %xmm2
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: not_a_hsub_1:
@@ -919,11 +919,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movaps %xmm1, %xmm4
; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1]
; SSE-NEXT: subss %xmm4, %xmm3
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; SSE-NEXT: subss %xmm3, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSE-NEXT: subss %xmm4, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: not_a_hsub_2:
@@ -1162,19 +1162,19 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %r9d
; SSE3-NEXT: addl %edx, %r9d
-; SSE3-NEXT: movd %xmm2, %esi
+; SSE3-NEXT: movd %xmm2, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r10d
-; SSE3-NEXT: addl %esi, %r10d
-; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %esi
+; SSE3-NEXT: addl %edx, %esi
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
+; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
; SSE3-NEXT: movd %xmm0, %edi
-; SSE3-NEXT: addl %esi, %edi
+; SSE3-NEXT: addl %edx, %edi
; SSE3-NEXT: movd %xmm1, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE3-NEXT: movd %xmm0, %r11d
-; SSE3-NEXT: addl %eax, %r11d
+; SSE3-NEXT: movd %xmm0, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE3-NEXT: movd %xmm0, %eax
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
@@ -1185,24 +1185,24 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
; SSE3-NEXT: movd %xmm0, %edx
; SSE3-NEXT: addl %eax, %edx
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
-; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: movd %xmm0, %r11d
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
-; SSE3-NEXT: movd %xmm0, %esi
-; SSE3-NEXT: addl %eax, %esi
+; SSE3-NEXT: movd %xmm0, %eax
+; SSE3-NEXT: addl %r11d, %eax
; SSE3-NEXT: movd %edi, %xmm0
-; SSE3-NEXT: movd %r9d, %xmm1
+; SSE3-NEXT: movd %esi, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT: movd %r10d, %xmm2
+; SSE3-NEXT: movd %r9d, %xmm2
; SSE3-NEXT: movd %r8d, %xmm0
; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: movd %ecx, %xmm2
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE3-NEXT: movd %eax, %xmm1
+; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd %edx, %xmm3
-; SSE3-NEXT: movd %r11d, %xmm1
+; SSE3-NEXT: movd %ecx, %xmm3
+; SSE3-NEXT: movd %r10d, %xmm1
; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: avx2_hadd_d:
@@ -1293,15 +1293,14 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: .Lcfi23:
; SSE3-NEXT: .cfi_offset %rbp, -16
; SSE3-NEXT: movd %xmm0, %eax
-; SSE3-NEXT: pextrw $1, %xmm0, %ecx
-; SSE3-NEXT: addl %eax, %ecx
-; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT: pextrw $1, %xmm0, %r10d
+; SSE3-NEXT: addl %eax, %r10d
; SSE3-NEXT: pextrw $2, %xmm0, %eax
-; SSE3-NEXT: pextrw $3, %xmm0, %r15d
-; SSE3-NEXT: addl %eax, %r15d
+; SSE3-NEXT: pextrw $3, %xmm0, %r11d
+; SSE3-NEXT: addl %eax, %r11d
; SSE3-NEXT: pextrw $4, %xmm0, %eax
-; SSE3-NEXT: pextrw $5, %xmm0, %r14d
-; SSE3-NEXT: addl %eax, %r14d
+; SSE3-NEXT: pextrw $5, %xmm0, %r12d
+; SSE3-NEXT: addl %eax, %r12d
; SSE3-NEXT: pextrw $6, %xmm0, %eax
; SSE3-NEXT: pextrw $7, %xmm0, %r13d
; SSE3-NEXT: addl %eax, %r13d
@@ -1310,70 +1309,71 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
; SSE3-NEXT: addl %eax, %ecx
; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
; SSE3-NEXT: pextrw $2, %xmm1, %eax
-; SSE3-NEXT: pextrw $3, %xmm1, %r11d
-; SSE3-NEXT: addl %eax, %r11d
-; SSE3-NEXT: pextrw $4, %xmm1, %eax
-; SSE3-NEXT: pextrw $5, %xmm1, %r10d
-; SSE3-NEXT: addl %eax, %r10d
-; SSE3-NEXT: pextrw $6, %xmm1, %eax
-; SSE3-NEXT: pextrw $7, %xmm1, %r12d
-; SSE3-NEXT: addl %eax, %r12d
-; SSE3-NEXT: movd %xmm2, %eax
-; SSE3-NEXT: pextrw $1, %xmm2, %ebx
-; SSE3-NEXT: addl %eax, %ebx
-; SSE3-NEXT: pextrw $2, %xmm2, %eax
-; SSE3-NEXT: pextrw $3, %xmm2, %ecx
+; SSE3-NEXT: pextrw $3, %xmm1, %ecx
; SSE3-NEXT: addl %eax, %ecx
+; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
+; SSE3-NEXT: pextrw $4, %xmm1, %eax
+; SSE3-NEXT: pextrw $5, %xmm1, %r14d
+; SSE3-NEXT: addl %eax, %r14d
+; SSE3-NEXT: pextrw $6, %xmm1, %esi
+; SSE3-NEXT: pextrw $7, %xmm1, %r15d
+; SSE3-NEXT: addl %esi, %r15d
+; SSE3-NEXT: movd %xmm2, %esi
+; SSE3-NEXT: pextrw $1, %xmm2, %ebp
+; SSE3-NEXT: addl %esi, %ebp
+; SSE3-NEXT: pextrw $2, %xmm2, %esi
+; SSE3-NEXT: pextrw $3, %xmm2, %edi
+; SSE3-NEXT: addl %esi, %edi
; SSE3-NEXT: pextrw $4, %xmm2, %esi
-; SSE3-NEXT: pextrw $5, %xmm2, %r8d
-; SSE3-NEXT: addl %esi, %r8d
+; SSE3-NEXT: pextrw $5, %xmm2, %eax
+; SSE3-NEXT: addl %esi, %eax
; SSE3-NEXT: pextrw $6, %xmm2, %esi
-; SSE3-NEXT: pextrw $7, %xmm2, %edx
-; SSE3-NEXT: addl %esi, %edx
-; SSE3-NEXT: movd %xmm3, %edi
+; SSE3-NEXT: pextrw $7, %xmm2, %ecx
+; SSE3-NEXT: addl %esi, %ecx
+; SSE3-NEXT: movd %xmm3, %ebx
; SSE3-NEXT: pextrw $1, %xmm3, %r9d
-; SSE3-NEXT: addl %edi, %r9d
-; SSE3-NEXT: pextrw $2, %xmm3, %ebp
-; SSE3-NEXT: pextrw $3, %xmm3, %edi
-; SSE3-NEXT: addl %ebp, %edi
-; SSE3-NEXT: pextrw $4, %xmm3, %eax
-; SSE3-NEXT: pextrw $5, %xmm3, %ebp
-; SSE3-NEXT: addl %eax, %ebp
-; SSE3-NEXT: pextrw $6, %xmm3, %esi
-; SSE3-NEXT: pextrw $7, %xmm3, %eax
-; SSE3-NEXT: addl %esi, %eax
-; SSE3-NEXT: movd %edx, %xmm8
-; SSE3-NEXT: movd %r13d, %xmm3
-; SSE3-NEXT: movd %ecx, %xmm9
-; SSE3-NEXT: movd %r15d, %xmm4
-; SSE3-NEXT: movd %r8d, %xmm10
-; SSE3-NEXT: movd %r14d, %xmm7
-; SSE3-NEXT: movd %ebx, %xmm11
-; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
-; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero
-; SSE3-NEXT: movd %eax, %xmm12
-; SSE3-NEXT: movd %r12d, %xmm6
-; SSE3-NEXT: movd %edi, %xmm13
-; SSE3-NEXT: movd %r11d, %xmm5
-; SSE3-NEXT: movd %ebp, %xmm14
-; SSE3-NEXT: movd %r10d, %xmm2
-; SSE3-NEXT: movd %r9d, %xmm15
+; SSE3-NEXT: addl %ebx, %r9d
+; SSE3-NEXT: pextrw $2, %xmm3, %edx
+; SSE3-NEXT: pextrw $3, %xmm3, %ebx
+; SSE3-NEXT: addl %edx, %ebx
+; SSE3-NEXT: pextrw $4, %xmm3, %edx
+; SSE3-NEXT: pextrw $5, %xmm3, %esi
+; SSE3-NEXT: addl %edx, %esi
+; SSE3-NEXT: pextrw $6, %xmm3, %r8d
+; SSE3-NEXT: pextrw $7, %xmm3, %edx
+; SSE3-NEXT: addl %r8d, %edx
+; SSE3-NEXT: movd %ecx, %xmm8
+; SSE3-NEXT: movd %eax, %xmm3
+; SSE3-NEXT: movd %edi, %xmm9
+; SSE3-NEXT: movd %ebp, %xmm4
+; SSE3-NEXT: movd %r13d, %xmm10
+; SSE3-NEXT: movd %r12d, %xmm7
+; SSE3-NEXT: movd %r11d, %xmm11
+; SSE3-NEXT: movd %r10d, %xmm0
+; SSE3-NEXT: movd %edx, %xmm12
+; SSE3-NEXT: movd %esi, %xmm6
+; SSE3-NEXT: movd %ebx, %xmm13
+; SSE3-NEXT: movd %r9d, %xmm5
+; SSE3-NEXT: movd %r15d, %xmm14
+; SSE3-NEXT: movd %r14d, %xmm2
+; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm15 # 4-byte Folded Reload
+; SSE3-NEXT: # xmm15 = mem[0],zero,zero,zero
; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero
; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE3-NEXT: popq %rbx
; SSE3-NEXT: popq %r12
; SSE3-NEXT: popq %r13
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
index 6d79d4de5206..091d1a22dbcd 100644
--- a/test/CodeGen/X86/haddsub-undef.ll
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -171,9 +171,8 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: addss %xmm2, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test8_undef:
diff --git a/test/CodeGen/X86/hoist-spill.ll b/test/CodeGen/X86/hoist-spill.ll
index afabf96b12a3..03f558fc3ae2 100644
--- a/test/CodeGen/X86/hoist-spill.ll
+++ b/test/CodeGen/X86/hoist-spill.ll
@@ -3,10 +3,8 @@
; Check no spills to the same stack slot after hoisting.
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
-; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
-; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 786534b00d39..56f4161147b4 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -4,16 +4,19 @@
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
-; STATIC: movl $-64, [[ECX:%e..]]
+; STATIC: movl $-64, [[EAX:%e..]]
-; STATIC: movl [[EAX:%e..]], _state+76([[ECX]])
-; STATIC: addl $16, [[ECX]]
+; STATIC: movl %{{.+}}, _state+76([[EAX]])
+; STATIC: addl $16, [[EAX]]
; STATIC: jne
-; In PIC mode the symbol can't be folded, so the change-compare-stride
-; trick applies.
+; The same for PIC mode.
-; PIC: cmpl $64
+; PIC: movl $-64, [[EAX:%e..]]
+
+; PIC: movl %{{.+}}, 76(%{{.+}},[[EAX]])
+; PIC: addl $16, [[EAX]]
+; PIC: jne
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
diff --git a/test/CodeGen/X86/madd.ll b/test/CodeGen/X86/madd.ll
index af86df510016..7c2bb822c967 100644
--- a/test/CodeGen/X86/madd.ll
+++ b/test/CodeGen/X86/madd.ll
@@ -9,17 +9,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB0_1: # %vector.body
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT: movdqu (%rdi), %xmm2
-; SSE2-NEXT: movdqu (%rsi), %xmm3
+; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2
+; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3
; SSE2-NEXT: pmaddwd %xmm2, %xmm3
; SSE2-NEXT: paddd %xmm3, %xmm1
-; SSE2-NEXT: addq $16, %rsi
-; SSE2-NEXT: addq $16, %rdi
-; SSE2-NEXT: addq $-8, %rax
+; SSE2-NEXT: addq $8, %rcx
+; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB0_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm0, %xmm1
@@ -34,17 +34,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB0_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT: vmovdqu (%rsi), %xmm2
-; AVX2-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX2-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2
+; AVX2-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: addq $16, %rsi
-; AVX2-NEXT: addq $16, %rdi
-; AVX2-NEXT: addq $-8, %rax
+; AVX2-NEXT: addq $8, %rcx
+; AVX2-NEXT: cmpq %rcx, %rax
; AVX2-NEXT: jne .LBB0_1
; AVX2-NEXT: # BB#2: # %middle.block
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -60,17 +60,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB0_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT: vmovdqu (%rsi), %xmm2
-; AVX512-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
+; AVX512-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2
+; AVX512-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
-; AVX512-NEXT: addq $16, %rsi
-; AVX512-NEXT: addq $16, %rdi
-; AVX512-NEXT: addq $-8, %rax
+; AVX512-NEXT: addq $8, %rcx
+; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB0_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -118,12 +118,13 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB1_1: # %vector.body
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
-; SSE2-NEXT: movdqu (%rdi), %xmm2
-; SSE2-NEXT: movdqu (%rsi), %xmm3
+; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2
+; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: pmulhuw %xmm2, %xmm4
; SSE2-NEXT: pmullw %xmm2, %xmm3
@@ -132,9 +133,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
; SSE2-NEXT: paddd %xmm3, %xmm1
-; SSE2-NEXT: addq $16, %rsi
-; SSE2-NEXT: addq $16, %rdi
-; SSE2-NEXT: addq $-8, %rax
+; SSE2-NEXT: addq $8, %rcx
+; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB1_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm1, %xmm0
@@ -149,6 +149,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB1_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
@@ -156,9 +157,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: addq $16, %rsi
-; AVX2-NEXT: addq $16, %rdi
-; AVX2-NEXT: addq $-8, %rax
+; AVX2-NEXT: addq $8, %rcx
+; AVX2-NEXT: cmpq %rcx, %rax
; AVX2-NEXT: jne .LBB1_1
; AVX2-NEXT: # BB#2: # %middle.block
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -174,6 +174,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB1_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
@@ -181,9 +182,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX512-NEXT: vpmulld %ymm1, %ymm2, %ymm1
; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: addq $16, %rsi
-; AVX512-NEXT: addq $16, %rdi
-; AVX512-NEXT: addq $-8, %rax
+; AVX512-NEXT: addq $8, %rcx
+; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB1_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -231,6 +231,7 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movl %edx, %eax
; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: pxor %xmm2, %xmm2
@@ -263,9 +264,8 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: paddd %xmm4, %xmm2
-; SSE2-NEXT: addq $16, %rsi
-; SSE2-NEXT: addq $16, %rdi
-; SSE2-NEXT: addq $-16, %rax
+; SSE2-NEXT: addq $16, %rcx
+; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB2_1
; SSE2-NEXT: # BB#2: # %middle.block
; SSE2-NEXT: paddd %xmm3, %xmm0
@@ -282,17 +282,17 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; AVX2: # BB#0: # %entry
; AVX2-NEXT: movl %edx, %eax
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
+; AVX2-NEXT: xorl %ecx, %ecx
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: .p2align 4, 0x90
; AVX2-NEXT: .LBB2_1: # %vector.body
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2
-; AVX2-NEXT: vpmovsxbw (%rsi), %ymm3
+; AVX2-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2
+; AVX2-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3
; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: addq $16, %rsi
-; AVX2-NEXT: addq $16, %rdi
-; AVX2-NEXT: addq $-16, %rax
+; AVX2-NEXT: addq $16, %rcx
+; AVX2-NEXT: cmpq %rcx, %rax
; AVX2-NEXT: jne .LBB2_1
; AVX2-NEXT: # BB#2: # %middle.block
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
@@ -309,18 +309,18 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
; AVX512: # BB#0: # %entry
; AVX512-NEXT: movl %edx, %eax
; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0
+; AVX512-NEXT: xorl %ecx, %ecx
; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX512-NEXT: .p2align 4, 0x90
; AVX512-NEXT: .LBB2_1: # %vector.body
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
-; AVX512-NEXT: vpmovsxbw (%rdi), %ymm2
-; AVX512-NEXT: vpmovsxbw (%rsi), %ymm3
+; AVX512-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2
+; AVX512-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3
; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2
; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0
-; AVX512-NEXT: addq $16, %rsi
-; AVX512-NEXT: addq $16, %rdi
-; AVX512-NEXT: addq $-16, %rax
+; AVX512-NEXT: addq $16, %rcx
+; AVX512-NEXT: cmpq %rcx, %rax
; AVX512-NEXT: jne .LBB2_1
; AVX512-NEXT: # BB#2: # %middle.block
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 8c0a4d4f1752..61aa05a5270b 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -5,7 +5,7 @@
; CHECK-LABEL: count_up
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: incq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_up(double* %d, i64 %n) nounwind {
@@ -38,7 +38,7 @@ return:
; CHECK-LABEL: count_down
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_down(double* %d, i64 %n) nounwind {
@@ -71,7 +71,7 @@ return:
; CHECK-LABEL: count_up_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: incq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_up_signed(double* %d, i64 %n) nounwind {
@@ -106,7 +106,7 @@ return:
; CHECK-LABEL: count_down_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_down_signed(double* %d, i64 %n) nounwind {
@@ -141,7 +141,7 @@ return:
; CHECK-LABEL: another_count_up
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_up(double* %d, i64 %n) nounwind {
@@ -174,7 +174,7 @@ return:
; CHECK-LABEL: another_count_down
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq $-8,
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_down(double* %d, i64 %n) nounwind {
@@ -207,7 +207,7 @@ return:
; CHECK-LABEL: another_count_up_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: addq
+; CHECK: addq $8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_up_signed(double* %d, i64 %n) nounwind {
@@ -242,7 +242,7 @@ return:
; CHECK-LABEL: another_count_down_signed
; CHECK-NOT: {{and|movz|sar|shl}}
-; CHECK: decq
+; CHECK: addq $-8
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_down_signed(double* %d, i64 %n) nounwind {
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index ce1bb3b06ce5..4e2475b1c67d 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -10,9 +10,28 @@
declare i32 @memcmp(i8*, i8*, i64)
-define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length2(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length2:
; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $2
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length2:
+; X64: # BB#0:
+; X64-NEXT: movl $2, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
+ ret i32 %m
+}
+
+define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length2_eq:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movzwl (%ecx), %ecx
@@ -20,7 +39,7 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length2:
+; X64-LABEL: length2_eq:
; X64: # BB#0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpw (%rsi), %ax
@@ -31,8 +50,8 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length2_const:
+define i1 @length2_eq_const(i8* %X) nounwind {
+; X32-LABEL: length2_eq_const:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl (%eax), %eax
@@ -40,7 +59,7 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length2_const:
+; X64-LABEL: length2_eq_const:
; X64: # BB#0:
; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: cmpl $12849, %eax # imm = 0x3231
@@ -51,8 +70,8 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
-; X32-LABEL: length2_nobuiltin_attr:
+define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length2_eq_nobuiltin_attr:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $2
@@ -64,7 +83,7 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length2_nobuiltin_attr:
+; X64-LABEL: length2_eq_nobuiltin_attr:
; X64: # BB#0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $2, %edx
@@ -78,9 +97,74 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length3(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length3:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $3
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length3:
+; X64: # BB#0:
+; X64-NEXT: movl $3, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
+ ret i32 %m
+}
+
+define i1 @length3_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length3_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $3
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setne %al
+; X32-NEXT: retl
+;
+; X64-LABEL: length3_eq:
+; X64: # BB#0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $3, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length4(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length4:
; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length4:
+; X64: # BB#0:
+; X64-NEXT: movl $4, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
+ ret i32 %m
+}
+
+define i1 @length4_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length4_eq:
+; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl (%ecx), %ecx
@@ -88,7 +172,7 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length4:
+; X64-LABEL: length4_eq:
; X64: # BB#0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: cmpl (%rsi), %eax
@@ -99,15 +183,15 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length4_const:
+define i1 @length4_eq_const(i8* %X) nounwind {
+; X32-LABEL: length4_eq_const:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length4_const:
+; X64-LABEL: length4_eq_const:
; X64: # BB#0:
; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231
; X64-NEXT: sete %al
@@ -117,7 +201,53 @@ define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+define i32 @length5(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length5:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $5
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length5:
+; X64: # BB#0:
+; X64-NEXT: movl $5, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
+ ret i32 %m
+}
+
+define i1 @length5_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length5_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $5
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setne %al
+; X32-NEXT: retl
+;
+; X64-LABEL: length5_eq:
+; X64: # BB#0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $5, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length8(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length8:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -126,11 +256,30 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length8:
+; X64: # BB#0:
+; X64-NEXT: movl $8, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
+ ret i32 %m
+}
+
+define i1 @length8_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length8_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $8
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length8:
+; X64-LABEL: length8_eq:
; X64: # BB#0:
; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: cmpq (%rsi), %rax
@@ -141,8 +290,8 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length8_const:
+define i1 @length8_eq_const(i8* %X) nounwind {
+; X32-LABEL: length8_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $8
@@ -154,7 +303,7 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length8_const:
+; X64-LABEL: length8_eq_const:
; X64: # BB#0:
; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130
; X64-NEXT: cmpq %rax, (%rdi)
@@ -165,7 +314,55 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length16(i8* %x, i8* %y) nounwind {
+define i1 @length12_eq(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length12_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: setne %al
+; X32-NEXT: retl
+;
+; X64-LABEL: length12_eq:
+; X64: # BB#0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl $12, %edx
+; X64-NEXT: callq memcmp
+; X64-NEXT: testl %eax, %eax
+; X64-NEXT: setne %al
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
+ %c = icmp ne i32 %m, 0
+ ret i1 %c
+}
+
+define i32 @length12(i8* %X, i8* %Y) nounwind {
+; X32-LABEL: length12:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $12
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length12:
+; X64: # BB#0:
+; X64-NEXT: movl $12, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
+ ret i32 %m
+}
+
+; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329
+
+define i32 @length16(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length16:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -174,11 +371,30 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length16:
+; X64: # BB#0:
+; X64-NEXT: movl $16, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
+ ret i32 %m
+}
+
+define i1 @length16_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length16_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $16
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length16:
+; SSE2-LABEL: length16_eq:
; SSE2: # BB#0:
; SSE2-NEXT: movdqu (%rsi), %xmm0
; SSE2-NEXT: movdqu (%rdi), %xmm1
@@ -188,7 +404,7 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length16:
+; AVX2-LABEL: length16_eq:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
@@ -201,8 +417,8 @@ define i1 @length16(i8* %x, i8* %y) nounwind {
ret i1 %cmp
}
-define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length16_const:
+define i1 @length16_eq_const(i8* %X) nounwind {
+; X32-LABEL: length16_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $16
@@ -214,7 +430,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length16_const:
+; SSE2-LABEL: length16_eq_const:
; SSE2: # BB#0:
; SSE2-NEXT: movdqu (%rdi), %xmm0
; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
@@ -223,7 +439,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length16_const:
+; AVX2-LABEL: length16_eq_const:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %xmm0
; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
@@ -236,7 +452,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length32(i8* %x, i8* %y) nounwind {
+define i32 @length32(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length32:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -245,11 +461,32 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length32:
+; X64: # BB#0:
+; X64-NEXT: movl $32, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind
+ ret i32 %m
+}
+
+; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325
+
+define i1 @length32_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length32_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $32
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length32:
+; SSE2-LABEL: length32_eq:
; SSE2: # BB#0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movl $32, %edx
@@ -259,7 +496,7 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
; SSE2-NEXT: popq %rcx
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length32:
+; AVX2-LABEL: length32_eq:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %ymm0
; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
@@ -273,8 +510,8 @@ define i1 @length32(i8* %x, i8* %y) nounwind {
ret i1 %cmp
}
-define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length32_const:
+define i1 @length32_eq_const(i8* %X) nounwind {
+; X32-LABEL: length32_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $32
@@ -286,7 +523,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; SSE2-LABEL: length32_const:
+; SSE2-LABEL: length32_eq_const:
; SSE2: # BB#0:
; SSE2-NEXT: pushq %rax
; SSE2-NEXT: movl $.L.str, %esi
@@ -297,7 +534,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
; SSE2-NEXT: popq %rcx
; SSE2-NEXT: retq
;
-; AVX2-LABEL: length32_const:
+; AVX2-LABEL: length32_eq_const:
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqu (%rdi), %ymm0
; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0
@@ -311,7 +548,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind {
ret i1 %c
}
-define i1 @length64(i8* %x, i8* %y) nounwind {
+define i32 @length64(i8* %X, i8* %Y) nounwind {
; X32-LABEL: length64:
; X32: # BB#0:
; X32-NEXT: pushl $0
@@ -320,11 +557,30 @@ define i1 @length64(i8* %x, i8* %y) nounwind {
; X32-NEXT: pushl {{[0-9]+}}(%esp)
; X32-NEXT: calll memcmp
; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: length64:
+; X64: # BB#0:
+; X64-NEXT: movl $64, %edx
+; X64-NEXT: jmp memcmp # TAILCALL
+ %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind
+ ret i32 %m
+}
+
+define i1 @length64_eq(i8* %x, i8* %y) nounwind {
+; X32-LABEL: length64_eq:
+; X32: # BB#0:
+; X32-NEXT: pushl $0
+; X32-NEXT: pushl $64
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: calll memcmp
+; X32-NEXT: addl $16, %esp
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setne %al
; X32-NEXT: retl
;
-; X64-LABEL: length64:
+; X64-LABEL: length64_eq:
; X64: # BB#0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $64, %edx
@@ -338,8 +594,8 @@ define i1 @length64(i8* %x, i8* %y) nounwind {
ret i1 %cmp
}
-define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind {
-; X32-LABEL: length64_const:
+define i1 @length64_eq_const(i8* %X) nounwind {
+; X32-LABEL: length64_eq_const:
; X32: # BB#0:
; X32-NEXT: pushl $0
; X32-NEXT: pushl $64
@@ -351,7 +607,7 @@ define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind {
; X32-NEXT: sete %al
; X32-NEXT: retl
;
-; X64-LABEL: length64_const:
+; X64-LABEL: length64_eq_const:
; X64: # BB#0:
; X64-NEXT: pushq %rax
; X64-NEXT: movl $.L.str, %esi
diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 71417694b0d4..1d5829407b71 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -269,10 +269,8 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
; SSE2-LABEL: merge_4f32_f32_012u:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_012u:
@@ -290,11 +288,11 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_4f32_f32_012u:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_012u:
@@ -320,10 +318,8 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
; SSE2-LABEL: merge_4f32_f32_019u:
; SSE2: # BB#0:
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_019u:
@@ -341,11 +337,11 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s
; X32-SSE1-LABEL: merge_4f32_f32_019u:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_019u:
@@ -1037,13 +1033,11 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin
define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp {
; SSE2-LABEL: merge_4f32_f32_2345_volatile:
; SSE2: # BB#0:
-; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSE41-LABEL: merge_4f32_f32_2345_volatile:
@@ -1065,13 +1059,13 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n
; X32-SSE1-LABEL: merge_4f32_f32_2345_volatile:
; X32-SSE1: # BB#0:
; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-SSE1-NEXT: retl
;
; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile:
diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll
index e3e2737cf3e6..7b39bfe1c484 100644
--- a/test/CodeGen/X86/mul-constant-i16.ll
+++ b/test/CodeGen/X86/mul-constant-i16.ll
@@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $11, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_11:
; X64: # BB#0:
-; X64-NEXT: imull $11, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,2), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 11
@@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $13, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_13:
; X64: # BB#0:
-; X64-NEXT: imull $13, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 13
@@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) {
define i16 @test_mul_by_14(i16 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $14, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_14:
; X64: # BB#0:
-; X64-NEXT: imull $14, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 14
@@ -337,14 +348,19 @@ define i16 @test_mul_by_18(i16 %x) {
define i16 @test_mul_by_19(i16 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $19, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_19:
; X64: # BB#0:
-; X64-NEXT: imull $19, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: shll $2, %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 19
@@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $21, %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_21:
; X64: # BB#0:
-; X64-NEXT: imull $21, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 21
@@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) {
define i16 @test_mul_by_22(i16 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $22, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_22:
; X64: # BB#0:
-; X64-NEXT: imull $22, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rdi,%rax,4), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 22
@@ -408,14 +432,19 @@ define i16 @test_mul_by_22(i16 %x) {
define i16 @test_mul_by_23(i16 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $23, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_23:
; X64: # BB#0:
-; X64-NEXT: imull $23, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: shll $3, %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 23
@@ -465,14 +494,19 @@ define i16 @test_mul_by_25(i16 %x) {
define i16 @test_mul_by_26(i16 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $26, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_26:
; X64: # BB#0:
-; X64-NEXT: imull $26, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 26
@@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) {
define i16 @test_mul_by_28(i16 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $28, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_28:
; X64: # BB#0:
-; X64-NEXT: imull $28, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 28
@@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) {
define i16 @test_mul_by_29(i16 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $29, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_29:
; X64: # BB#0:
-; X64-NEXT: imull $29, %edi, %eax
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: addl %edi, %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 29
@@ -536,14 +582,20 @@ define i16 @test_mul_by_29(i16 %x) {
define i16 @test_mul_by_30(i16 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: imull $30, %eax, %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
; X64-LABEL: test_mul_by_30:
; X64: # BB#0:
-; X64-NEXT: imull $30, %edi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: subl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 30
@@ -587,3 +639,30 @@ define i16 @test_mul_by_32(i16 %x) {
%mul = mul nsw i16 %x, 32
ret i16 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i16 @test_mul_spec(i16 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mul_spec:
+; X64: # BB#0:
+; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; X64-NEXT: leal 2(%rdi,%rdi,4), %eax
+; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; X64-NEXT: retq
+ %mul = mul nsw i16 %x, 9
+ %add = add nsw i16 %mul, 42
+ %mul2 = mul nsw i16 %x, 5
+ %add2 = add nsw i16 %mul2, 2
+ %mul3 = mul nsw i16 %add, %add2
+ ret i16 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll
index 76e46e1f1b09..d545b477e102 100644
--- a/test/CodeGen/X86/mul-constant-i32.ll
+++ b/test/CodeGen/X86/mul-constant-i32.ll
@@ -1,6 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
define i32 @test_mul_by_1(i32 %x) {
; X86-LABEL: test_mul_by_1:
@@ -8,10 +14,40 @@ define i32 @test_mul_by_1(i32 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_1:
-; X64: # BB#0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 1
ret i32 %mul
}
@@ -23,11 +59,47 @@ define i32 @test_mul_by_2(i32 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_2:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: addl %eax, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 2
ret i32 %mul
}
@@ -38,11 +110,46 @@ define i32 @test_mul_by_3(i32 %x) {
; X86-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_3:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 3
ret i32 %mul
}
@@ -54,11 +161,47 @@ define i32 @test_mul_by_4(i32 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_4:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $2, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 4
ret i32 %mul
}
@@ -69,11 +212,46 @@ define i32 @test_mul_by_5(i32 %x) {
; X86-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_5:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 5
ret i32 %mul
}
@@ -86,12 +264,46 @@ define i32 @test_mul_by_6(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_6:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 6
ret i32 %mul
}
@@ -104,12 +316,46 @@ define i32 @test_mul_by_7(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_7:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,8), %eax
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 7
ret i32 %mul
}
@@ -121,11 +367,47 @@ define i32 @test_mul_by_8(i32 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_8:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $3, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 8
ret i32 %mul
}
@@ -136,11 +418,46 @@ define i32 @test_mul_by_9(i32 %x) {
; X86-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_9:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 9
ret i32 %mul
}
@@ -153,12 +470,46 @@ define i32 @test_mul_by_10(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_10:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 10
ret i32 %mul
}
@@ -166,13 +517,49 @@ define i32 @test_mul_by_10(i32 %x) {
define i32 @test_mul_by_11(i32 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
-; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_11:
-; X64: # BB#0:
-; X64-NEXT: imull $11, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 11
ret i32 %mul
}
@@ -185,12 +572,46 @@ define i32 @test_mul_by_12(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_12:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $2, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 12
ret i32 %mul
}
@@ -198,13 +619,49 @@ define i32 @test_mul_by_12(i32 %x) {
define i32 @test_mul_by_13(i32 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
-; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_13:
-; X64: # BB#0:
-; X64-NEXT: imull $13, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 13
ret i32 %mul
}
@@ -212,13 +669,52 @@ define i32 @test_mul_by_13(i32 %x) {
define i32 @test_mul_by_14(i32 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
-; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_14:
-; X64: # BB#0:
-; X64-NEXT: imull $14, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 14
ret i32 %mul
}
@@ -231,12 +727,46 @@ define i32 @test_mul_by_15(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_15:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 15
ret i32 %mul
}
@@ -248,11 +778,47 @@ define i32 @test_mul_by_16(i32 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_16:
-; X64: # BB#0:
-; X64-NEXT: shll $4, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $4, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 16
ret i32 %mul
}
@@ -266,13 +832,49 @@ define i32 @test_mul_by_17(i32 %x) {
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_17:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shll $4, %eax
-; X64-NEXT: leal (%rax,%rdi), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 17
ret i32 %mul
}
@@ -285,12 +887,46 @@ define i32 @test_mul_by_18(i32 %x) {
; X86-NEXT: leal (%eax,%eax,8), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_18:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: addl %edi, %edi
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 18
ret i32 %mul
}
@@ -298,13 +934,52 @@ define i32 @test_mul_by_18(i32 %x) {
define i32 @test_mul_by_19(i32 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
-; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_19:
-; X64: # BB#0:
-; X64-NEXT: imull $19, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 19
ret i32 %mul
}
@@ -317,12 +992,46 @@ define i32 @test_mul_by_20(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_20:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $2, %edi
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 20
ret i32 %mul
}
@@ -330,13 +1039,49 @@ define i32 @test_mul_by_20(i32 %x) {
define i32 @test_mul_by_21(i32 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
-; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_21:
-; X64: # BB#0:
-; X64-NEXT: imull $21, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 21
ret i32 %mul
}
@@ -344,13 +1089,52 @@ define i32 @test_mul_by_21(i32 %x) {
define i32 @test_mul_by_22(i32 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
-; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,4), %eax
+; X86-NEXT: leal (%ecx,%eax,4), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_22:
-; X64: # BB#0:
-; X64-NEXT: imull $22, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 22
ret i32 %mul
}
@@ -358,13 +1142,52 @@ define i32 @test_mul_by_22(i32 %x) {
define i32 @test_mul_by_23(i32 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
-; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %eax
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_23:
-; X64: # BB#0:
-; X64-NEXT: imull $23, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 23
ret i32 %mul
}
@@ -377,12 +1200,46 @@ define i32 @test_mul_by_24(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_24:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: shll $3, %edi
-; X64-NEXT: leal (%rdi,%rdi,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 24
ret i32 %mul
}
@@ -395,12 +1252,46 @@ define i32 @test_mul_by_25(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_25:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,4), %eax
-; X64-NEXT: leal (%rax,%rax,4), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 25
ret i32 %mul
}
@@ -408,13 +1299,52 @@ define i32 @test_mul_by_25(i32 %x) {
define i32 @test_mul_by_26(i32 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
-; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_26:
-; X64: # BB#0:
-; X64-NEXT: imull $26, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 26
ret i32 %mul
}
@@ -427,12 +1357,46 @@ define i32 @test_mul_by_27(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_27:
-; X64: # BB#0:
-; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; X64-NEXT: leal (%rdi,%rdi,8), %eax
-; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 27
ret i32 %mul
}
@@ -440,13 +1404,52 @@ define i32 @test_mul_by_27(i32 %x) {
define i32 @test_mul_by_28(i32 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
-; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_28:
-; X64: # BB#0:
-; X64-NEXT: imull $28, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 28
ret i32 %mul
}
@@ -454,13 +1457,55 @@ define i32 @test_mul_by_28(i32 %x) {
define i32 @test_mul_by_29(i32 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_29:
-; X64: # BB#0:
-; X64-NEXT: imull $29, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 29
ret i32 %mul
}
@@ -468,13 +1513,53 @@ define i32 @test_mul_by_29(i32 %x) {
define i32 @test_mul_by_30(i32 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
-; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_30:
-; X64: # BB#0:
-; X64-NEXT: imull $30, %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 30
ret i32 %mul
}
@@ -488,12 +1573,46 @@ define i32 @test_mul_by_31(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_31:
-; X64: # BB#0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shll $5, %eax
-; X64-NEXT: subl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00]
+; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 31
ret i32 %mul
}
@@ -505,11 +1624,124 @@ define i32 @test_mul_by_32(i32 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_32:
-; X64: # BB#0:
-; X64-NEXT: shll $5, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50]
+; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50]
+; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: shll $5, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00]
+; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i32 %x, 32
ret i32 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i32 @test_mul_spec(i32 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: leal 42(%eax,%eax,8), %ecx
+; X86-NOOPT-NEXT: leal 2(%eax,%eax,4), %eax
+; X86-NOOPT-NEXT: imull %ecx, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50]
+; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
+; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i32 %x, 9
+ %add = add nsw i32 %mul, 42
+ %mul2 = mul nsw i32 %x, 5
+ %add2 = add nsw i32 %mul2, 2
+ %mul3 = mul nsw i32 %add, %add2
+ ret i32 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll
index 8579179a8231..ea841c761c7b 100644
--- a/test/CodeGen/X86/mul-constant-i64.ll
+++ b/test/CodeGen/X86/mul-constant-i64.ll
@@ -1,18 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
-define i64 @test_mul_by_1(i64 %x) {
+define i64 @test_mul_by_1(i64 %x) nounwind {
; X86-LABEL: test_mul_by_1:
; X86: # BB#0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_1:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_1:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_1:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_1:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_1:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_1:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_1:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_1:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 1
ret i64 %mul
}
@@ -26,10 +63,43 @@ define i64 @test_mul_by_2(i64 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_2:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_2:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_2:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_2:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $1, %eax, %edx
+; X86-NOOPT-NEXT: addl %eax, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_2:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_2:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_2:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_2:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 2
ret i64 %mul
}
@@ -43,10 +113,43 @@ define i64 @test_mul_by_3(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_3:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_3:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_3:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_3:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $3, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_3:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_3:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_3:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_3:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 3
ret i64 %mul
}
@@ -60,10 +163,43 @@ define i64 @test_mul_by_4(i64 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_4:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_4:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_4:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_4:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $2, %eax, %edx
+; X86-NOOPT-NEXT: shll $2, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_4:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_4:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_4:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_4:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 4
ret i64 %mul
}
@@ -77,10 +213,43 @@ define i64 @test_mul_by_5(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_5:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_5:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_5:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_5:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $5, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_5:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_5:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_5:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_5:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 5
ret i64 %mul
}
@@ -95,11 +264,46 @@ define i64 @test_mul_by_6(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_6:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_6:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_6:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_6:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $6, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_6:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_6:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_6:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_6:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 6
ret i64 %mul
}
@@ -115,11 +319,46 @@ define i64 @test_mul_by_7(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_7:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_7:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_7:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_7:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $7, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_7:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_7:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_7:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_7:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 7
ret i64 %mul
}
@@ -133,10 +372,43 @@ define i64 @test_mul_by_8(i64 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_8:
-; X64: # BB#0:
-; X64-NEXT: leaq (,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_8:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_8:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_8:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $3, %eax, %edx
+; X86-NOOPT-NEXT: shll $3, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_8:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_8:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_8:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_8:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 8
ret i64 %mul
}
@@ -150,10 +422,43 @@ define i64 @test_mul_by_9(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_9:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_9:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_9:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_9:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $9, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_9:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_9:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_9:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_9:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 9
ret i64 %mul
}
@@ -168,11 +473,46 @@ define i64 @test_mul_by_10(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_10:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_10:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_10:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_10:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $10, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_10:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_10:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_10:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_10:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 10
ret i64 %mul
}
@@ -180,16 +520,53 @@ define i64 @test_mul_by_10(i64 %x) {
define i64 @test_mul_by_11(i64 %x) {
; X86-LABEL: test_mul_by_11:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %ecx
; X86-NEXT: movl $11, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_11:
-; X64: # BB#0:
-; X64-NEXT: imulq $11, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_11:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_11:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_11:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $11, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_11:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_11:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_11:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_11:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 11
ret i64 %mul
}
@@ -204,11 +581,46 @@ define i64 @test_mul_by_12(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_12:
-; X64: # BB#0:
-; X64-NEXT: shlq $2, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_12:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_12:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_12:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $12, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_12:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_12:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_12:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_12:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 12
ret i64 %mul
}
@@ -216,16 +628,53 @@ define i64 @test_mul_by_12(i64 %x) {
define i64 @test_mul_by_13(i64 %x) {
; X86-LABEL: test_mul_by_13:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
; X86-NEXT: movl $13, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_13:
-; X64: # BB#0:
-; X64-NEXT: imulq $13, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_13:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_13:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_13:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $13, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_13:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_13:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_13:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_13:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 13
ret i64 %mul
}
@@ -233,16 +682,56 @@ define i64 @test_mul_by_13(i64 %x) {
define i64 @test_mul_by_14(i64 %x) {
; X86-LABEL: test_mul_by_14:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $14, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_14:
-; X64: # BB#0:
-; X64-NEXT: imulq $14, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_14:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_14:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_14:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $14, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_14:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_14:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_14:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_14:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 14
ret i64 %mul
}
@@ -258,11 +747,46 @@ define i64 @test_mul_by_15(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_15:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: leaq (%rax,%rax,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_15:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_15:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_15:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $15, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_15:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_15:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_15:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_15:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 15
ret i64 %mul
}
@@ -276,11 +800,49 @@ define i64 @test_mul_by_16(i64 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_16:
-; X64: # BB#0:
-; X64-NEXT: shlq $4, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_16:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_16:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_16:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $4, %eax, %edx
+; X86-NOOPT-NEXT: shll $4, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_16:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_16:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_16:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_16:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 16
ret i64 %mul
}
@@ -297,12 +859,49 @@ define i64 @test_mul_by_17(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_17:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlq $4, %rax
-; X64-NEXT: leaq (%rax,%rdi), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_17:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_17:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_17:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $17, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_17:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_17:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_17:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00]
+; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_17:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 17
ret i64 %mul
}
@@ -317,11 +916,46 @@ define i64 @test_mul_by_18(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_18:
-; X64: # BB#0:
-; X64-NEXT: addq %rdi, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_18:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_18:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_18:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $18, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_18:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_18:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_18:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_18:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 18
ret i64 %mul
}
@@ -329,16 +963,56 @@ define i64 @test_mul_by_18(i64 %x) {
define i64 @test_mul_by_19(i64 %x) {
; X86-LABEL: test_mul_by_19:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $19, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_19:
-; X64: # BB#0:
-; X64-NEXT: imulq $19, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_19:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_19:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_19:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $19, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_19:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_19:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_19:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_19:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 19
ret i64 %mul
}
@@ -353,11 +1027,46 @@ define i64 @test_mul_by_20(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_20:
-; X64: # BB#0:
-; X64-NEXT: shlq $2, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_20:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_20:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_20:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $20, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_20:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_20:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_20:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_20:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 20
ret i64 %mul
}
@@ -365,16 +1074,53 @@ define i64 @test_mul_by_20(i64 %x) {
define i64 @test_mul_by_21(i64 %x) {
; X86-LABEL: test_mul_by_21:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
; X86-NEXT: movl $21, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_21:
-; X64: # BB#0:
-; X64-NEXT: imulq $21, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_21:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_21:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_21:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $21, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_21:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_21:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_21:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_21:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 21
ret i64 %mul
}
@@ -382,16 +1128,56 @@ define i64 @test_mul_by_21(i64 %x) {
define i64 @test_mul_by_22(i64 %x) {
; X86-LABEL: test_mul_by_22:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $22, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_22:
-; X64: # BB#0:
-; X64-NEXT: imulq $22, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_22:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_22:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_22:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $22, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_22:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_22:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_22:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_22:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 22
ret i64 %mul
}
@@ -399,16 +1185,56 @@ define i64 @test_mul_by_22(i64 %x) {
define i64 @test_mul_by_23(i64 %x) {
; X86-LABEL: test_mul_by_23:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: shll $3, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $23, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_23:
-; X64: # BB#0:
-; X64-NEXT: imulq $23, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_23:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_23:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_23:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $23, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_23:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_23:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_23:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_23:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 23
ret i64 %mul
}
@@ -423,11 +1249,46 @@ define i64 @test_mul_by_24(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,8), %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_24:
-; X64: # BB#0:
-; X64-NEXT: shlq $3, %rdi
-; X64-NEXT: leaq (%rdi,%rdi,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_24:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_24:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_24:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $24, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_24:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_24:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_24:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_24:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 24
ret i64 %mul
}
@@ -443,11 +1304,46 @@ define i64 @test_mul_by_25(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_25:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,4), %rax
-; X64-NEXT: leaq (%rax,%rax,4), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_25:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_25:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_25:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $25, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_25:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_25:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_25:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_25:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 25
ret i64 %mul
}
@@ -455,16 +1351,56 @@ define i64 @test_mul_by_25(i64 %x) {
define i64 @test_mul_by_26(i64 %x) {
; X86-LABEL: test_mul_by_26:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $26, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_26:
-; X64: # BB#0:
-; X64-NEXT: imulq $26, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_26:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_26:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_26:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $26, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_26:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_26:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_26:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_26:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 26
ret i64 %mul
}
@@ -480,11 +1416,46 @@ define i64 @test_mul_by_27(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_27:
-; X64: # BB#0:
-; X64-NEXT: leaq (%rdi,%rdi,8), %rax
-; X64-NEXT: leaq (%rax,%rax,2), %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_27:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_27:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_27:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $27, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_27:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_27:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_27:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_27:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 27
ret i64 %mul
}
@@ -492,16 +1463,56 @@ define i64 @test_mul_by_27(i64 %x) {
define i64 @test_mul_by_28(i64 %x) {
; X86-LABEL: test_mul_by_28:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $28, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_28:
-; X64: # BB#0:
-; X64-NEXT: imulq $28, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_28:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_28:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_28:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $28, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_28:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_28:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_28:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_28:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 28
ret i64 %mul
}
@@ -509,16 +1520,59 @@ define i64 @test_mul_by_28(i64 %x) {
define i64 @test_mul_by_29(i64 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $29, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_29:
-; X64: # BB#0:
-; X64-NEXT: imulq $29, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_29:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_29:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_29:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $29, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_29:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_29:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_29:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_29:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 29
ret i64 %mul
}
@@ -526,16 +1580,59 @@ define i64 @test_mul_by_29(i64 %x) {
define i64 @test_mul_by_30(i64 %x) {
; X86-LABEL: test_mul_by_30:
; X86: # BB#0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: subl %eax, %ecx
; X86-NEXT: movl $30, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_30:
-; X64: # BB#0:
-; X64-NEXT: imulq $30, %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_30:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_30:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_30:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $30, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_30:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_30:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_30:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_30:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 30
ret i64 %mul
}
@@ -552,12 +1649,49 @@ define i64 @test_mul_by_31(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_31:
-; X64: # BB#0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlq $5, %rax
-; X64-NEXT: subq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_31:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_31:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_31:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl $31, %eax
+; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp)
+; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_31:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_31:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_31:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00]
+; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_31:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 31
ret i64 %mul
}
@@ -571,11 +1705,168 @@ define i64 @test_mul_by_32(i64 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-LABEL: test_mul_by_32:
-; X64: # BB#0:
-; X64-NEXT: shlq $5, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: retq
+; X64-HSW-LABEL: test_mul_by_32:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_by_32:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_by_32:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOOPT-NEXT: shldl $5, %eax, %edx
+; X86-NOOPT-NEXT: shll $5, %eax
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_by_32:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_by_32:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50]
+; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_by_32:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_by_32:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00]
+; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
%mul = mul nsw i64 %x, 32
ret i64 %mul
}
+
+; (x*9+42)*(x*5+2)
+define i64 @test_mul_spec(i64 %x) nounwind {
+; X86-LABEL: test_mul_spec:
+; X86: # BB#0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT: movl $9, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: leal (%edi,%edi,8), %ebx
+; X86-NEXT: addl $42, %esi
+; X86-NEXT: adcl %edx, %ebx
+; X86-NEXT: movl $5, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: leal (%edi,%edi,4), %edi
+; X86-NEXT: addl $2, %ecx
+; X86-NEXT: adcl %edx, %edi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: mull %ecx
+; X86-NEXT: imull %esi, %edi
+; X86-NEXT: addl %edi, %edx
+; X86-NEXT: imull %ebx, %ecx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: test_mul_spec:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-HSW-NEXT: retq # sched: [1:1.00]
+;
+; X64-JAG-LABEL: test_mul_spec:
+; X64-JAG: # BB#0:
+; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-JAG-NEXT: retq # sched: [4:1.00]
+;
+; X86-NOOPT-LABEL: test_mul_spec:
+; X86-NOOPT: # BB#0:
+; X86-NOOPT-NEXT: pushl %ebx
+; X86-NOOPT-NEXT: pushl %edi
+; X86-NOOPT-NEXT: pushl %esi
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NOOPT-NEXT: movl $9, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: movl %eax, %esi
+; X86-NOOPT-NEXT: leal (%edi,%edi,8), %ebx
+; X86-NOOPT-NEXT: addl $42, %esi
+; X86-NOOPT-NEXT: adcl %edx, %ebx
+; X86-NOOPT-NEXT: movl $5, %edx
+; X86-NOOPT-NEXT: movl %ecx, %eax
+; X86-NOOPT-NEXT: mull %edx
+; X86-NOOPT-NEXT: movl %eax, %ecx
+; X86-NOOPT-NEXT: leal (%edi,%edi,4), %edi
+; X86-NOOPT-NEXT: addl $2, %ecx
+; X86-NOOPT-NEXT: adcl %edx, %edi
+; X86-NOOPT-NEXT: movl %esi, %eax
+; X86-NOOPT-NEXT: mull %ecx
+; X86-NOOPT-NEXT: imull %esi, %edi
+; X86-NOOPT-NEXT: addl %edi, %edx
+; X86-NOOPT-NEXT: imull %ebx, %ecx
+; X86-NOOPT-NEXT: addl %ecx, %edx
+; X86-NOOPT-NEXT: popl %esi
+; X86-NOOPT-NEXT: popl %edi
+; X86-NOOPT-NEXT: popl %ebx
+; X86-NOOPT-NEXT: retl
+;
+; HSW-NOOPT-LABEL: test_mul_spec:
+; HSW-NOOPT: # BB#0:
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25]
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
+; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
+; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; HSW-NOOPT-NEXT: retq # sched: [1:1.00]
+;
+; JAG-NOOPT-LABEL: test_mul_spec:
+; JAG-NOOPT: # BB#0:
+; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50]
+; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50]
+; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+;
+; X64-SLM-LABEL: test_mul_spec:
+; X64-SLM: # BB#0:
+; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; X64-SLM-NEXT: retq # sched: [4:1.00]
+;
+; SLM-NOOPT-LABEL: test_mul_spec:
+; SLM-NOOPT: # BB#0:
+; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
+; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
+; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
+; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+ %mul = mul nsw i64 %x, 9
+ %add = add nsw i64 %mul, 42
+ %mul2 = mul nsw i64 %x, 5
+ %add2 = add nsw i64 %mul2, 2
+ %mul3 = mul nsw i64 %add, %add2
+ ret i64 %mul3
+}
diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll
new file mode 100644
index 000000000000..65d80a699e24
--- /dev/null
+++ b/test/CodeGen/X86/mul-constant-result.ll
@@ -0,0 +1,1291 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @mult(i32, i32) local_unnamed_addr #0 {
+; X86-LABEL: mult:
+; X86: # BB#0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi0:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi1:
+; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: cmpl $1, %edx
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: movl $1, %esi
+; X86-NEXT: jg .LBB0_2
+; X86-NEXT: # BB#1:
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: .LBB0_2:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: testl %edx, %edx
+; X86-NEXT: je .LBB0_4
+; X86-NEXT: # BB#3:
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: .LBB0_4:
+; X86-NEXT: decl %ecx
+; X86-NEXT: cmpl $31, %ecx
+; X86-NEXT: ja .LBB0_39
+; X86-NEXT: # BB#5:
+; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4)
+; X86-NEXT: .LBB0_6:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_39:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB0_40:
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_7:
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_8:
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_9:
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_10:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_11:
+; X86-NEXT: leal (,%eax,8), %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_13:
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_14:
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_15:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_16:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_17:
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_18:
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_19:
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: jmp .LBB0_20
+; X86-NEXT: .LBB0_21:
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_22:
+; X86-NEXT: shll $4, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_23:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $4, %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_24:
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_25:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: shll $2, %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_26:
+; X86-NEXT: shll $2, %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_27:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: leal (%eax,%ecx,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_28:
+; X86-NEXT: leal (%eax,%eax,4), %ecx
+; X86-NEXT: .LBB0_20:
+; X86-NEXT: leal (%eax,%ecx,4), %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_29:
+; X86-NEXT: leal (%eax,%eax,2), %ecx
+; X86-NEXT: shll $3, %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_30:
+; X86-NEXT: shll $3, %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_31:
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: leal (%eax,%eax,4), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_32:
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_33:
+; X86-NEXT: leal (%eax,%eax,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_34:
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_35:
+; X86-NEXT: leal (%eax,%eax,8), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_36:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: jmp .LBB0_12
+; X86-NEXT: .LBB0_37:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shll $5, %ecx
+; X86-NEXT: .LBB0_12:
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+; X86-NEXT: .LBB0_38:
+; X86-NEXT: shll $5, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: mult:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; X64-HSW-NEXT: cmpl $1, %esi
+; X64-HSW-NEXT: movl $1, %ecx
+; X64-HSW-NEXT: movl %esi, %eax
+; X64-HSW-NEXT: cmovgl %ecx, %eax
+; X64-HSW-NEXT: testl %esi, %esi
+; X64-HSW-NEXT: cmovel %ecx, %eax
+; X64-HSW-NEXT: addl $-1, %edi
+; X64-HSW-NEXT: cmpl $31, %edi
+; X64-HSW-NEXT: ja .LBB0_36
+; X64-HSW-NEXT: # BB#1:
+; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8)
+; X64-HSW-NEXT: .LBB0_2:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_36:
+; X64-HSW-NEXT: xorl %eax, %eax
+; X64-HSW-NEXT: .LBB0_37:
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_3:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_4:
+; X64-HSW-NEXT: shll $2, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_5:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_6:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_7:
+; X64-HSW-NEXT: leal (,%rax,8), %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_9:
+; X64-HSW-NEXT: shll $3, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_10:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_11:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_12:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_13:
+; X64-HSW-NEXT: shll $2, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_14:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_15:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT: jmp .LBB0_16
+; X64-HSW-NEXT: .LBB0_18:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_19:
+; X64-HSW-NEXT: shll $4, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_20:
+; X64-HSW-NEXT: movl %eax, %ecx
+; X64-HSW-NEXT: shll $4, %ecx
+; X64-HSW-NEXT: jmp .LBB0_17
+; X64-HSW-NEXT: .LBB0_21:
+; X64-HSW-NEXT: addl %eax, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_22:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: shll $2, %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_23:
+; X64-HSW-NEXT: shll $2, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_24:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_25:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx
+; X64-HSW-NEXT: .LBB0_16:
+; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx
+; X64-HSW-NEXT: jmp .LBB0_17
+; X64-HSW-NEXT: .LBB0_26:
+; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx
+; X64-HSW-NEXT: shll $3, %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_27:
+; X64-HSW-NEXT: shll $3, %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_28:
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_29:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_30:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_31:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT: jmp .LBB0_17
+; X64-HSW-NEXT: .LBB0_32:
+; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
+; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-HSW-NEXT: addl %eax, %ecx
+; X64-HSW-NEXT: .LBB0_17:
+; X64-HSW-NEXT: addl %eax, %ecx
+; X64-HSW-NEXT: movl %ecx, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_33:
+; X64-HSW-NEXT: movl %eax, %ecx
+; X64-HSW-NEXT: shll $5, %ecx
+; X64-HSW-NEXT: subl %eax, %ecx
+; X64-HSW-NEXT: jmp .LBB0_8
+; X64-HSW-NEXT: .LBB0_34:
+; X64-HSW-NEXT: movl %eax, %ecx
+; X64-HSW-NEXT: shll $5, %ecx
+; X64-HSW-NEXT: .LBB0_8:
+; X64-HSW-NEXT: subl %eax, %ecx
+; X64-HSW-NEXT: movl %ecx, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: .LBB0_35:
+; X64-HSW-NEXT: shll $5, %eax
+; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
+; X64-HSW-NEXT: retq
+ %3 = icmp eq i32 %1, 0
+ %4 = icmp sgt i32 %1, 1
+ %5 = or i1 %3, %4
+ %6 = select i1 %5, i32 1, i32 %1
+ switch i32 %0, label %69 [
+ i32 1, label %70
+ i32 2, label %7
+ i32 3, label %9
+ i32 4, label %11
+ i32 5, label %13
+ i32 6, label %15
+ i32 7, label %17
+ i32 8, label %19
+ i32 9, label %21
+ i32 10, label %23
+ i32 11, label %25
+ i32 12, label %27
+ i32 13, label %29
+ i32 14, label %31
+ i32 15, label %33
+ i32 16, label %35
+ i32 17, label %37
+ i32 18, label %39
+ i32 19, label %41
+ i32 20, label %43
+ i32 21, label %45
+ i32 22, label %47
+ i32 23, label %49
+ i32 24, label %51
+ i32 25, label %53
+ i32 26, label %55
+ i32 27, label %57
+ i32 28, label %59
+ i32 29, label %61
+ i32 30, label %63
+ i32 31, label %65
+ i32 32, label %67
+ ]
+
+; <label>:7: ; preds = %2
+ %8 = shl nsw i32 %6, 1
+ br label %70
+
+; <label>:9: ; preds = %2
+ %10 = mul nsw i32 %6, 3
+ br label %70
+
+; <label>:11: ; preds = %2
+ %12 = shl nsw i32 %6, 2
+ br label %70
+
+; <label>:13: ; preds = %2
+ %14 = mul nsw i32 %6, 5
+ br label %70
+
+; <label>:15: ; preds = %2
+ %16 = mul nsw i32 %6, 6
+ br label %70
+
+; <label>:17: ; preds = %2
+ %18 = mul nsw i32 %6, 7
+ br label %70
+
+; <label>:19: ; preds = %2
+ %20 = shl nsw i32 %6, 3
+ br label %70
+
+; <label>:21: ; preds = %2
+ %22 = mul nsw i32 %6, 9
+ br label %70
+
+; <label>:23: ; preds = %2
+ %24 = mul nsw i32 %6, 10
+ br label %70
+
+; <label>:25: ; preds = %2
+ %26 = mul nsw i32 %6, 11
+ br label %70
+
+; <label>:27: ; preds = %2
+ %28 = mul nsw i32 %6, 12
+ br label %70
+
+; <label>:29: ; preds = %2
+ %30 = mul nsw i32 %6, 13
+ br label %70
+
+; <label>:31: ; preds = %2
+ %32 = mul nsw i32 %6, 14
+ br label %70
+
+; <label>:33: ; preds = %2
+ %34 = mul nsw i32 %6, 15
+ br label %70
+
+; <label>:35: ; preds = %2
+ %36 = shl nsw i32 %6, 4
+ br label %70
+
+; <label>:37: ; preds = %2
+ %38 = mul nsw i32 %6, 17
+ br label %70
+
+; <label>:39: ; preds = %2
+ %40 = mul nsw i32 %6, 18
+ br label %70
+
+; <label>:41: ; preds = %2
+ %42 = mul nsw i32 %6, 19
+ br label %70
+
+; <label>:43: ; preds = %2
+ %44 = mul nsw i32 %6, 20
+ br label %70
+
+; <label>:45: ; preds = %2
+ %46 = mul nsw i32 %6, 21
+ br label %70
+
+; <label>:47: ; preds = %2
+ %48 = mul nsw i32 %6, 22
+ br label %70
+
+; <label>:49: ; preds = %2
+ %50 = mul nsw i32 %6, 23
+ br label %70
+
+; <label>:51: ; preds = %2
+ %52 = mul nsw i32 %6, 24
+ br label %70
+
+; <label>:53: ; preds = %2
+ %54 = mul nsw i32 %6, 25
+ br label %70
+
+; <label>:55: ; preds = %2
+ %56 = mul nsw i32 %6, 26
+ br label %70
+
+; <label>:57: ; preds = %2
+ %58 = mul nsw i32 %6, 27
+ br label %70
+
+; <label>:59: ; preds = %2
+ %60 = mul nsw i32 %6, 28
+ br label %70
+
+; <label>:61: ; preds = %2
+ %62 = mul nsw i32 %6, 29
+ br label %70
+
+; <label>:63: ; preds = %2
+ %64 = mul nsw i32 %6, 30
+ br label %70
+
+; <label>:65: ; preds = %2
+ %66 = mul nsw i32 %6, 31
+ br label %70
+
+; <label>:67: ; preds = %2
+ %68 = shl nsw i32 %6, 5
+ br label %70
+
+; <label>:69: ; preds = %2
+ br label %70
+
+; <label>:70: ; preds = %2, %69, %67, %65, %63, %61, %59, %57, %55, %53, %51, %49, %47, %45, %43, %41, %39, %37, %35, %33, %31, %29, %27, %25, %23, %21, %19, %17, %15, %13, %11, %9, %7
+ %71 = phi i32 [ %8, %7 ], [ %10, %9 ], [ %12, %11 ], [ %14, %13 ], [ %16, %15 ], [ %18, %17 ], [ %20, %19 ], [ %22, %21 ], [ %24, %23 ], [ %26, %25 ], [ %28, %27 ], [ %30, %29 ], [ %32, %31 ], [ %34, %33 ], [ %36, %35 ], [ %38, %37 ], [ %40, %39 ], [ %42, %41 ], [ %44, %43 ], [ %46, %45 ], [ %48, %47 ], [ %50, %49 ], [ %52, %51 ], [ %54, %53 ], [ %56, %55 ], [ %58, %57 ], [ %60, %59 ], [ %62, %61 ], [ %64, %63 ], [ %66, %65 ], [ %68, %67 ], [ 0, %69 ], [ %6, %2 ]
+ ret i32 %71
+}
+
+; Function Attrs: norecurse nounwind readnone uwtable
+define i32 @foo() local_unnamed_addr #0 {
+; X86-LABEL: foo:
+; X86: # BB#0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: .Lcfi2:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %edi
+; X86-NEXT: .Lcfi3:
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi4:
+; X86-NEXT: .cfi_def_cfa_offset 16
+; X86-NEXT: .Lcfi5:
+; X86-NEXT: .cfi_offset %esi, -16
+; X86-NEXT: .Lcfi6:
+; X86-NEXT: .cfi_offset %edi, -12
+; X86-NEXT: .Lcfi7:
+; X86-NEXT: .cfi_offset %ebx, -8
+; X86-NEXT: pushl $0
+; X86-NEXT: .Lcfi8:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $1
+; X86-NEXT: .Lcfi9:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi10:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: xorl $1, %esi
+; X86-NEXT: pushl $1
+; X86-NEXT: .Lcfi11:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $2
+; X86-NEXT: .Lcfi12:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi13:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $2, %edi
+; X86-NEXT: pushl $1
+; X86-NEXT: .Lcfi14:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $3
+; X86-NEXT: .Lcfi15:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi16:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $3, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $2
+; X86-NEXT: .Lcfi17:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $4
+; X86-NEXT: .Lcfi18:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi19:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $4, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $2
+; X86-NEXT: .Lcfi20:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $5
+; X86-NEXT: .Lcfi21:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi22:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $5, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $3
+; X86-NEXT: .Lcfi23:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $6
+; X86-NEXT: .Lcfi24:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi25:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $6, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $3
+; X86-NEXT: .Lcfi26:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $7
+; X86-NEXT: .Lcfi27:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi28:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $7, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $4
+; X86-NEXT: .Lcfi29:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $8
+; X86-NEXT: .Lcfi30:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi31:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $8, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $4
+; X86-NEXT: .Lcfi32:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $9
+; X86-NEXT: .Lcfi33:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi34:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $9, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $5
+; X86-NEXT: .Lcfi35:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $10
+; X86-NEXT: .Lcfi36:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi37:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $10, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $5
+; X86-NEXT: .Lcfi38:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $11
+; X86-NEXT: .Lcfi39:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi40:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $11, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $6
+; X86-NEXT: .Lcfi41:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $12
+; X86-NEXT: .Lcfi42:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi43:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $12, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $6
+; X86-NEXT: .Lcfi44:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $13
+; X86-NEXT: .Lcfi45:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi46:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $13, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $7
+; X86-NEXT: .Lcfi47:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $14
+; X86-NEXT: .Lcfi48:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi49:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $14, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $7
+; X86-NEXT: .Lcfi50:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $15
+; X86-NEXT: .Lcfi51:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi52:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $15, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $8
+; X86-NEXT: .Lcfi53:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $16
+; X86-NEXT: .Lcfi54:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi55:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $16, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $8
+; X86-NEXT: .Lcfi56:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $17
+; X86-NEXT: .Lcfi57:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi58:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $17, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $9
+; X86-NEXT: .Lcfi59:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $18
+; X86-NEXT: .Lcfi60:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi61:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $18, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $9
+; X86-NEXT: .Lcfi62:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $19
+; X86-NEXT: .Lcfi63:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi64:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $19, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $10
+; X86-NEXT: .Lcfi65:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $20
+; X86-NEXT: .Lcfi66:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi67:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $20, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $10
+; X86-NEXT: .Lcfi68:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $21
+; X86-NEXT: .Lcfi69:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi70:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $21, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $11
+; X86-NEXT: .Lcfi71:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $22
+; X86-NEXT: .Lcfi72:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi73:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $22, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $11
+; X86-NEXT: .Lcfi74:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $23
+; X86-NEXT: .Lcfi75:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi76:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $23, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $12
+; X86-NEXT: .Lcfi77:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $24
+; X86-NEXT: .Lcfi78:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi79:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $24, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $12
+; X86-NEXT: .Lcfi80:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $25
+; X86-NEXT: .Lcfi81:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi82:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $25, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $13
+; X86-NEXT: .Lcfi83:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $26
+; X86-NEXT: .Lcfi84:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi85:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $26, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $13
+; X86-NEXT: .Lcfi86:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $27
+; X86-NEXT: .Lcfi87:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi88:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $27, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $14
+; X86-NEXT: .Lcfi89:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $28
+; X86-NEXT: .Lcfi90:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi91:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $28, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $14
+; X86-NEXT: .Lcfi92:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $29
+; X86-NEXT: .Lcfi93:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi94:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $29, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: pushl $15
+; X86-NEXT: .Lcfi95:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $30
+; X86-NEXT: .Lcfi96:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi97:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: xorl $30, %edi
+; X86-NEXT: orl %ebx, %edi
+; X86-NEXT: pushl $15
+; X86-NEXT: .Lcfi98:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $31
+; X86-NEXT: .Lcfi99:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi100:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl $31, %ebx
+; X86-NEXT: orl %edi, %ebx
+; X86-NEXT: orl %esi, %ebx
+; X86-NEXT: pushl $16
+; X86-NEXT: .Lcfi101:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: pushl $32
+; X86-NEXT: .Lcfi102:
+; X86-NEXT: .cfi_adjust_cfa_offset 4
+; X86-NEXT: calll mult
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: .Lcfi103:
+; X86-NEXT: .cfi_adjust_cfa_offset -8
+; X86-NEXT: xorl $32, %eax
+; X86-NEXT: orl %ebx, %eax
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: jne .LBB1_2
+; X86-NEXT: # BB#1:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB1_2:
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-HSW-LABEL: foo:
+; X64-HSW: # BB#0:
+; X64-HSW-NEXT: pushq %rbp
+; X64-HSW-NEXT: .Lcfi0:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 16
+; X64-HSW-NEXT: pushq %r15
+; X64-HSW-NEXT: .Lcfi1:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 24
+; X64-HSW-NEXT: pushq %r14
+; X64-HSW-NEXT: .Lcfi2:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 32
+; X64-HSW-NEXT: pushq %r12
+; X64-HSW-NEXT: .Lcfi3:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 40
+; X64-HSW-NEXT: pushq %rbx
+; X64-HSW-NEXT: .Lcfi4:
+; X64-HSW-NEXT: .cfi_def_cfa_offset 48
+; X64-HSW-NEXT: .Lcfi5:
+; X64-HSW-NEXT: .cfi_offset %rbx, -48
+; X64-HSW-NEXT: .Lcfi6:
+; X64-HSW-NEXT: .cfi_offset %r12, -40
+; X64-HSW-NEXT: .Lcfi7:
+; X64-HSW-NEXT: .cfi_offset %r14, -32
+; X64-HSW-NEXT: .Lcfi8:
+; X64-HSW-NEXT: .cfi_offset %r15, -24
+; X64-HSW-NEXT: .Lcfi9:
+; X64-HSW-NEXT: .cfi_offset %rbp, -16
+; X64-HSW-NEXT: xorl %r12d, %r12d
+; X64-HSW-NEXT: movl $1, %edi
+; X64-HSW-NEXT: xorl %esi, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $1, %ebx
+; X64-HSW-NEXT: movl $2, %edi
+; X64-HSW-NEXT: movl $1, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $2, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $3, %edi
+; X64-HSW-NEXT: movl $1, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $3, %r14d
+; X64-HSW-NEXT: movl $4, %edi
+; X64-HSW-NEXT: movl $2, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $4, %ebx
+; X64-HSW-NEXT: orl %r14d, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $5, %edi
+; X64-HSW-NEXT: movl $2, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $5, %r14d
+; X64-HSW-NEXT: movl $6, %edi
+; X64-HSW-NEXT: movl $3, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $6, %ebp
+; X64-HSW-NEXT: orl %r14d, %ebp
+; X64-HSW-NEXT: movl $7, %edi
+; X64-HSW-NEXT: movl $3, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $7, %r14d
+; X64-HSW-NEXT: orl %ebp, %r14d
+; X64-HSW-NEXT: orl %ebx, %r14d
+; X64-HSW-NEXT: movl $8, %edi
+; X64-HSW-NEXT: movl $4, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $8, %ebx
+; X64-HSW-NEXT: movl $9, %edi
+; X64-HSW-NEXT: movl $4, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $9, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $10, %edi
+; X64-HSW-NEXT: movl $5, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $10, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $11, %edi
+; X64-HSW-NEXT: movl $5, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r15d
+; X64-HSW-NEXT: xorl $11, %r15d
+; X64-HSW-NEXT: orl %ebx, %r15d
+; X64-HSW-NEXT: orl %r14d, %r15d
+; X64-HSW-NEXT: movl $12, %edi
+; X64-HSW-NEXT: movl $6, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $12, %ebx
+; X64-HSW-NEXT: movl $13, %edi
+; X64-HSW-NEXT: movl $6, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $13, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $14, %edi
+; X64-HSW-NEXT: movl $7, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $14, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $15, %edi
+; X64-HSW-NEXT: movl $7, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $15, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $16, %edi
+; X64-HSW-NEXT: movl $8, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $16, %r14d
+; X64-HSW-NEXT: orl %ebp, %r14d
+; X64-HSW-NEXT: orl %r15d, %r14d
+; X64-HSW-NEXT: movl $17, %edi
+; X64-HSW-NEXT: movl $8, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $17, %ebp
+; X64-HSW-NEXT: movl $18, %edi
+; X64-HSW-NEXT: movl $9, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $18, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $19, %edi
+; X64-HSW-NEXT: movl $9, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $19, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $20, %edi
+; X64-HSW-NEXT: movl $10, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $20, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $21, %edi
+; X64-HSW-NEXT: movl $10, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $21, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $22, %edi
+; X64-HSW-NEXT: movl $11, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r15d
+; X64-HSW-NEXT: xorl $22, %r15d
+; X64-HSW-NEXT: orl %ebp, %r15d
+; X64-HSW-NEXT: orl %r14d, %r15d
+; X64-HSW-NEXT: movl $23, %edi
+; X64-HSW-NEXT: movl $11, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $23, %ebp
+; X64-HSW-NEXT: movl $24, %edi
+; X64-HSW-NEXT: movl $12, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $24, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $25, %edi
+; X64-HSW-NEXT: movl $12, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $25, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $26, %edi
+; X64-HSW-NEXT: movl $13, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $26, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $27, %edi
+; X64-HSW-NEXT: movl $13, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $27, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: movl $28, %edi
+; X64-HSW-NEXT: movl $14, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $28, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $29, %edi
+; X64-HSW-NEXT: movl $14, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebp
+; X64-HSW-NEXT: xorl $29, %ebp
+; X64-HSW-NEXT: orl %ebx, %ebp
+; X64-HSW-NEXT: orl %r15d, %ebp
+; X64-HSW-NEXT: movl $30, %edi
+; X64-HSW-NEXT: movl $15, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %r14d
+; X64-HSW-NEXT: xorl $30, %r14d
+; X64-HSW-NEXT: movl $31, %edi
+; X64-HSW-NEXT: movl $15, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: movl %eax, %ebx
+; X64-HSW-NEXT: xorl $31, %ebx
+; X64-HSW-NEXT: orl %r14d, %ebx
+; X64-HSW-NEXT: orl %ebp, %ebx
+; X64-HSW-NEXT: movl $32, %edi
+; X64-HSW-NEXT: movl $16, %esi
+; X64-HSW-NEXT: callq mult
+; X64-HSW-NEXT: xorl $32, %eax
+; X64-HSW-NEXT: orl %ebx, %eax
+; X64-HSW-NEXT: movl $-1, %eax
+; X64-HSW-NEXT: cmovel %r12d, %eax
+; X64-HSW-NEXT: popq %rbx
+; X64-HSW-NEXT: popq %r12
+; X64-HSW-NEXT: popq %r14
+; X64-HSW-NEXT: popq %r15
+; X64-HSW-NEXT: popq %rbp
+; X64-HSW-NEXT: retq
+ %1 = tail call i32 @mult(i32 1, i32 0)
+ %2 = icmp ne i32 %1, 1
+ %3 = tail call i32 @mult(i32 2, i32 1)
+ %4 = icmp ne i32 %3, 2
+ %5 = or i1 %2, %4
+ %6 = tail call i32 @mult(i32 3, i32 1)
+ %7 = icmp ne i32 %6, 3
+ %8 = or i1 %5, %7
+ %9 = tail call i32 @mult(i32 4, i32 2)
+ %10 = icmp ne i32 %9, 4
+ %11 = or i1 %8, %10
+ %12 = tail call i32 @mult(i32 5, i32 2)
+ %13 = icmp ne i32 %12, 5
+ %14 = or i1 %11, %13
+ %15 = tail call i32 @mult(i32 6, i32 3)
+ %16 = icmp ne i32 %15, 6
+ %17 = or i1 %14, %16
+ %18 = tail call i32 @mult(i32 7, i32 3)
+ %19 = icmp ne i32 %18, 7
+ %20 = or i1 %17, %19
+ %21 = tail call i32 @mult(i32 8, i32 4)
+ %22 = icmp ne i32 %21, 8
+ %23 = or i1 %20, %22
+ %24 = tail call i32 @mult(i32 9, i32 4)
+ %25 = icmp ne i32 %24, 9
+ %26 = or i1 %23, %25
+ %27 = tail call i32 @mult(i32 10, i32 5)
+ %28 = icmp ne i32 %27, 10
+ %29 = or i1 %26, %28
+ %30 = tail call i32 @mult(i32 11, i32 5)
+ %31 = icmp ne i32 %30, 11
+ %32 = or i1 %29, %31
+ %33 = tail call i32 @mult(i32 12, i32 6)
+ %34 = icmp ne i32 %33, 12
+ %35 = or i1 %32, %34
+ %36 = tail call i32 @mult(i32 13, i32 6)
+ %37 = icmp ne i32 %36, 13
+ %38 = or i1 %35, %37
+ %39 = tail call i32 @mult(i32 14, i32 7)
+ %40 = icmp ne i32 %39, 14
+ %41 = or i1 %38, %40
+ %42 = tail call i32 @mult(i32 15, i32 7)
+ %43 = icmp ne i32 %42, 15
+ %44 = or i1 %41, %43
+ %45 = tail call i32 @mult(i32 16, i32 8)
+ %46 = icmp ne i32 %45, 16
+ %47 = or i1 %44, %46
+ %48 = tail call i32 @mult(i32 17, i32 8)
+ %49 = icmp ne i32 %48, 17
+ %50 = or i1 %47, %49
+ %51 = tail call i32 @mult(i32 18, i32 9)
+ %52 = icmp ne i32 %51, 18
+ %53 = or i1 %50, %52
+ %54 = tail call i32 @mult(i32 19, i32 9)
+ %55 = icmp ne i32 %54, 19
+ %56 = or i1 %53, %55
+ %57 = tail call i32 @mult(i32 20, i32 10)
+ %58 = icmp ne i32 %57, 20
+ %59 = or i1 %56, %58
+ %60 = tail call i32 @mult(i32 21, i32 10)
+ %61 = icmp ne i32 %60, 21
+ %62 = or i1 %59, %61
+ %63 = tail call i32 @mult(i32 22, i32 11)
+ %64 = icmp ne i32 %63, 22
+ %65 = or i1 %62, %64
+ %66 = tail call i32 @mult(i32 23, i32 11)
+ %67 = icmp ne i32 %66, 23
+ %68 = or i1 %65, %67
+ %69 = tail call i32 @mult(i32 24, i32 12)
+ %70 = icmp ne i32 %69, 24
+ %71 = or i1 %68, %70
+ %72 = tail call i32 @mult(i32 25, i32 12)
+ %73 = icmp ne i32 %72, 25
+ %74 = or i1 %71, %73
+ %75 = tail call i32 @mult(i32 26, i32 13)
+ %76 = icmp ne i32 %75, 26
+ %77 = or i1 %74, %76
+ %78 = tail call i32 @mult(i32 27, i32 13)
+ %79 = icmp ne i32 %78, 27
+ %80 = or i1 %77, %79
+ %81 = tail call i32 @mult(i32 28, i32 14)
+ %82 = icmp ne i32 %81, 28
+ %83 = or i1 %80, %82
+ %84 = tail call i32 @mult(i32 29, i32 14)
+ %85 = icmp ne i32 %84, 29
+ %86 = or i1 %83, %85
+ %87 = tail call i32 @mult(i32 30, i32 15)
+ %88 = icmp ne i32 %87, 30
+ %89 = or i1 %86, %88
+ %90 = tail call i32 @mult(i32 31, i32 15)
+ %91 = icmp ne i32 %90, 31
+ %92 = or i1 %89, %91
+ %93 = tail call i32 @mult(i32 32, i32 16)
+ %94 = icmp ne i32 %93, 32
+ %95 = or i1 %92, %94
+ %96 = sext i1 %95 to i32
+ ret i32 %96
+}
+
+attributes #0 = { norecurse nounwind readnone uwtable }
diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll
index eaab26ef9547..3c916fd38c6c 100644
--- a/test/CodeGen/X86/nontemporal-loads.ll
+++ b/test/CodeGen/X86/nontemporal-loads.ll
@@ -168,7 +168,9 @@ define <8 x float> @test_v8f32(<8 x float>* %src) {
;
; AVX1-LABEL: test_v8f32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8f32:
@@ -199,7 +201,9 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) {
;
; AVX1-LABEL: test_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i32:
@@ -240,7 +244,9 @@ define <4 x double> @test_v4f64(<4 x double>* %src) {
;
; AVX1-LABEL: test_v4f64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4f64:
@@ -271,7 +277,9 @@ define <4 x i64> @test_v4i64(<4 x i64>* %src) {
;
; AVX1-LABEL: test_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4i64:
@@ -302,7 +310,9 @@ define <16 x i16> @test_v16i16(<16 x i16>* %src) {
;
; AVX1-LABEL: test_v16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16i16:
@@ -333,7 +343,9 @@ define <32 x i8> @test_v32i8(<32 x i8>* %src) {
;
; AVX1-LABEL: test_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v32i8:
@@ -370,8 +382,12 @@ define <16 x float> @test_v16f32(<16 x float>* %src) {
;
; AVX1-LABEL: test_v16f32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16f32:
@@ -407,8 +423,12 @@ define <16 x i32> @test_v16i32(<16 x i32>* %src) {
;
; AVX1-LABEL: test_v16i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v16i32:
@@ -444,8 +464,12 @@ define <8 x double> @test_v8f64(<8 x double>* %src) {
;
; AVX1-LABEL: test_v8f64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8f64:
@@ -481,8 +505,12 @@ define <8 x i64> @test_v8i64(<8 x i64>* %src) {
;
; AVX1-LABEL: test_v8i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i64:
@@ -518,8 +546,12 @@ define <32 x i16> @test_v32i16(<32 x i16>* %src) {
;
; AVX1-LABEL: test_v32i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v32i16:
@@ -567,8 +599,12 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) {
;
; AVX1-LABEL: test_v64i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vmovaps 32(%rdi), %ymm1
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v64i8:
@@ -601,19 +637,27 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) {
; Check cases where the load would be folded.
define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
-; SSE-LABEL: test_arg_v4f32:
-; SSE: # BB#0:
-; SSE-NEXT: addps (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: addps (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v4f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: addps %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v4f32:
; AVX: # BB#0:
-; AVX-NEXT: vaddps (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v4f32:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddps (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1
%2 = fadd <4 x float> %arg, %1
@@ -621,19 +665,27 @@ define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) {
}
define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
-; SSE-LABEL: test_arg_v4i32:
-; SSE: # BB#0:
-; SSE-NEXT: paddd (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddd (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddd %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: vpaddd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v4i32:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddd (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
%2 = add <4 x i32> %arg, %1
@@ -641,19 +693,27 @@ define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) {
}
define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
-; SSE-LABEL: test_arg_v2f64:
-; SSE: # BB#0:
-; SSE-NEXT: addpd (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v2f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: addpd (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v2f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: addpd %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vaddpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v2f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddpd (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1
%2 = fadd <2 x double> %arg, %1
@@ -661,19 +721,27 @@ define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) {
}
define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
-; SSE-LABEL: test_arg_v2i64:
-; SSE: # BB#0:
-; SSE-NEXT: paddq (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v2i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddq (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v2i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddq %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v2i64:
; AVX: # BB#0:
-; AVX-NEXT: vpaddq (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v2i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddq (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1
%2 = add <2 x i64> %arg, %1
@@ -681,19 +749,27 @@ define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) {
}
define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
-; SSE-LABEL: test_arg_v8i16:
-; SSE: # BB#0:
-; SSE-NEXT: paddw (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddw (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddw %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v8i16:
; AVX: # BB#0:
-; AVX-NEXT: vpaddw (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v8i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddw (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1
%2 = add <8 x i16> %arg, %1
@@ -701,19 +777,27 @@ define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) {
}
define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
-; SSE-LABEL: test_arg_v16i8:
-; SSE: # BB#0:
-; SSE-NEXT: paddb (%rdi), %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddb (%rdi), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v16i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa (%rdi), %xmm1
+; SSE41-NEXT: paddb %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: test_arg_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vpaddb (%rdi), %xmm0, %xmm0
+; AVX-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_arg_v16i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddb (%rdi), %xmm0, %xmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1
%2 = add <16 x i8> %arg, %1
@@ -723,20 +807,38 @@ define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) {
; And now YMM versions.
define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
-; SSE-LABEL: test_arg_v8f32:
-; SSE: # BB#0:
-; SSE-NEXT: addps (%rdi), %xmm0
-; SSE-NEXT: addps 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: addps (%rdi), %xmm0
+; SSE2-NEXT: addps 16(%rdi), %xmm1
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v8f32:
-; AVX: # BB#0:
-; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v8f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: addps %xmm3, %xmm0
+; SSE41-NEXT: addps %xmm2, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v8f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v8f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v8f32:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddps (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1
%2 = fadd <8 x float> %arg, %1
@@ -744,51 +846,90 @@ define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) {
}
define <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) {
-; SSE-LABEL: test_arg_v8i32:
-; SSE: # BB#0:
-; SSE-NEXT: paddd (%rdi), %xmm0
-; SSE-NEXT: paddd 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddd (%rdi), %xmm0
+; SSE2-NEXT: paddd 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v8i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddd %xmm3, %xmm0
+; SSE41-NEXT: paddd %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v8i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
-; AVX512-LABEL: test_arg_v8i32:
-; AVX512: # BB#0:
-; AVX512-NEXT: vpaddd (%rdi), %ymm0, %ymm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: test_arg_v8i32:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: test_arg_v8i32:
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: retq
+;
+; AVX512VL-LABEL: test_arg_v8i32:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vpaddd (%rdi), %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
%2 = add <8 x i32> %arg, %1
ret <8 x i32> %2
}
define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
-; SSE-LABEL: test_arg_v4f64:
-; SSE: # BB#0:
-; SSE-NEXT: addpd (%rdi), %xmm0
-; SSE-NEXT: addpd 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: addpd (%rdi), %xmm0
+; SSE2-NEXT: addpd 16(%rdi), %xmm1
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v4f64:
-; AVX: # BB#0:
-; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v4f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: addpd %xmm3, %xmm0
+; SSE41-NEXT: addpd %xmm2, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v4f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v4f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v4f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddpd (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1
%2 = fadd <4 x double> %arg, %1
@@ -796,30 +937,40 @@ define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) {
}
define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
-; SSE-LABEL: test_arg_v4i64:
-; SSE: # BB#0:
-; SSE-NEXT: paddq (%rdi), %xmm0
-; SSE-NEXT: paddq 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddq (%rdi), %xmm0
+; SSE2-NEXT: paddq 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v4i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddq %xmm3, %xmm0
+; SSE41-NEXT: paddq %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v4i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddq (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1
%2 = add <4 x i64> %arg, %1
@@ -827,30 +978,40 @@ define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) {
}
define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
-; SSE-LABEL: test_arg_v16i16:
-; SSE: # BB#0:
-; SSE-NEXT: paddw (%rdi), %xmm0
-; SSE-NEXT: paddw 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddw (%rdi), %xmm0
+; SSE2-NEXT: paddw 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v16i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddw %xmm3, %xmm0
+; SSE41-NEXT: paddw %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v16i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v16i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v16i16:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddw (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1
%2 = add <16 x i16> %arg, %1
@@ -858,30 +1019,40 @@ define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) {
}
define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
-; SSE-LABEL: test_arg_v32i8:
-; SSE: # BB#0:
-; SSE-NEXT: paddb (%rdi), %xmm0
-; SSE-NEXT: paddb 16(%rdi), %xmm1
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v32i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddb (%rdi), %xmm0
+; SSE2-NEXT: paddb 16(%rdi), %xmm1
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v32i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm2
+; SSE41-NEXT: movntdqa (%rdi), %xmm3
+; SSE41-NEXT: paddb %xmm3, %xmm0
+; SSE41-NEXT: paddb %xmm2, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v32i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm1
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v32i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v32i8:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddb (%rdi), %ymm0, %ymm0
+; AVX512-NEXT: vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1
%2 = add <32 x i8> %arg, %1
@@ -891,23 +1062,50 @@ define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) {
; And now ZMM versions.
define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
-; SSE-LABEL: test_arg_v16f32:
-; SSE: # BB#0:
-; SSE-NEXT: addps (%rdi), %xmm0
-; SSE-NEXT: addps 16(%rdi), %xmm1
-; SSE-NEXT: addps 32(%rdi), %xmm2
-; SSE-NEXT: addps 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16f32:
+; SSE2: # BB#0:
+; SSE2-NEXT: addps (%rdi), %xmm0
+; SSE2-NEXT: addps 16(%rdi), %xmm1
+; SSE2-NEXT: addps 32(%rdi), %xmm2
+; SSE2-NEXT: addps 48(%rdi), %xmm3
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v16f32:
-; AVX: # BB#0:
-; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0
-; AVX-NEXT: vaddps 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v16f32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: addps %xmm7, %xmm0
+; SSE41-NEXT: addps %xmm6, %xmm1
+; SSE41-NEXT: addps %xmm5, %xmm2
+; SSE41-NEXT: addps %xmm4, %xmm3
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v16f32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v16f32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vaddps %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v16f32:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddps (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1
%2 = fadd <16 x float> %arg, %1
@@ -915,39 +1113,54 @@ define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) {
}
define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
-; SSE-LABEL: test_arg_v16i32:
-; SSE: # BB#0:
-; SSE-NEXT: paddd (%rdi), %xmm0
-; SSE-NEXT: paddd 16(%rdi), %xmm1
-; SSE-NEXT: paddd 32(%rdi), %xmm2
-; SSE-NEXT: paddd 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v16i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddd (%rdi), %xmm0
+; SSE2-NEXT: paddd 16(%rdi), %xmm1
+; SSE2-NEXT: paddd 32(%rdi), %xmm2
+; SSE2-NEXT: paddd 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v16i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddd %xmm7, %xmm0
+; SSE41-NEXT: paddd %xmm6, %xmm1
+; SSE41-NEXT: paddd %xmm5, %xmm2
+; SSE41-NEXT: paddd %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v16i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddd %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v16i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddd 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddd %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v16i32:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddd (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1
%2 = add <16 x i32> %arg, %1
@@ -955,23 +1168,50 @@ define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) {
}
define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
-; SSE-LABEL: test_arg_v8f64:
-; SSE: # BB#0:
-; SSE-NEXT: addpd (%rdi), %xmm0
-; SSE-NEXT: addpd 16(%rdi), %xmm1
-; SSE-NEXT: addpd 32(%rdi), %xmm2
-; SSE-NEXT: addpd 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8f64:
+; SSE2: # BB#0:
+; SSE2-NEXT: addpd (%rdi), %xmm0
+; SSE2-NEXT: addpd 16(%rdi), %xmm1
+; SSE2-NEXT: addpd 32(%rdi), %xmm2
+; SSE2-NEXT: addpd 48(%rdi), %xmm3
+; SSE2-NEXT: retq
;
-; AVX-LABEL: test_arg_v8f64:
-; AVX: # BB#0:
-; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0
-; AVX-NEXT: vaddpd 32(%rdi), %ymm1, %ymm1
-; AVX-NEXT: retq
+; SSE41-LABEL: test_arg_v8f64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: addpd %xmm7, %xmm0
+; SSE41-NEXT: addpd %xmm6, %xmm1
+; SSE41-NEXT: addpd %xmm5, %xmm2
+; SSE41-NEXT: addpd %xmm4, %xmm3
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: test_arg_v8f64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
+; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_arg_v8f64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vaddpd %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v8f64:
; AVX512: # BB#0:
-; AVX512-NEXT: vaddpd (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1
%2 = fadd <8 x double> %arg, %1
@@ -979,39 +1219,54 @@ define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) {
}
define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
-; SSE-LABEL: test_arg_v8i64:
-; SSE: # BB#0:
-; SSE-NEXT: paddq (%rdi), %xmm0
-; SSE-NEXT: paddq 16(%rdi), %xmm1
-; SSE-NEXT: paddq 32(%rdi), %xmm2
-; SSE-NEXT: paddq 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v8i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddq (%rdi), %xmm0
+; SSE2-NEXT: paddq 16(%rdi), %xmm1
+; SSE2-NEXT: paddq 32(%rdi), %xmm2
+; SSE2-NEXT: paddq 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v8i64:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddq %xmm7, %xmm0
+; SSE41-NEXT: paddq %xmm6, %xmm1
+; SSE41-NEXT: paddq %xmm5, %xmm2
+; SSE41-NEXT: paddq %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v8i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v8i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddq 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test_arg_v8i64:
; AVX512: # BB#0:
-; AVX512-NEXT: vpaddq (%rdi), %zmm0, %zmm0
+; AVX512-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
%1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1
%2 = add <8 x i64> %arg, %1
@@ -1019,51 +1274,70 @@ define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) {
}
define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
-; SSE-LABEL: test_arg_v32i16:
-; SSE: # BB#0:
-; SSE-NEXT: paddw (%rdi), %xmm0
-; SSE-NEXT: paddw 16(%rdi), %xmm1
-; SSE-NEXT: paddw 32(%rdi), %xmm2
-; SSE-NEXT: paddw 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v32i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddw (%rdi), %xmm0
+; SSE2-NEXT: paddw 16(%rdi), %xmm1
+; SSE2-NEXT: paddw 32(%rdi), %xmm2
+; SSE2-NEXT: paddw 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v32i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddw %xmm7, %xmm0
+; SSE41-NEXT: paddw %xmm6, %xmm1
+; SSE41-NEXT: paddw %xmm5, %xmm2
+; SSE41-NEXT: paddw %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v32i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddw %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddw %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v32i16:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: test_arg_v32i16:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpaddw (%rdi), %ymm0, %ymm0
-; AVX512F-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512F-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_arg_v32i16:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpaddw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: test_arg_v32i16:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpaddw (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1
+; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512VL-NEXT: vpaddw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: retq
%1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1
%2 = add <32 x i16> %arg, %1
@@ -1071,51 +1345,70 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) {
}
define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) {
-; SSE-LABEL: test_arg_v64i8:
-; SSE: # BB#0:
-; SSE-NEXT: paddb (%rdi), %xmm0
-; SSE-NEXT: paddb 16(%rdi), %xmm1
-; SSE-NEXT: paddb 32(%rdi), %xmm2
-; SSE-NEXT: paddb 48(%rdi), %xmm3
-; SSE-NEXT: retq
+; SSE2-LABEL: test_arg_v64i8:
+; SSE2: # BB#0:
+; SSE2-NEXT: paddb (%rdi), %xmm0
+; SSE2-NEXT: paddb 16(%rdi), %xmm1
+; SSE2-NEXT: paddb 32(%rdi), %xmm2
+; SSE2-NEXT: paddb 48(%rdi), %xmm3
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_arg_v64i8:
+; SSE41: # BB#0:
+; SSE41-NEXT: movntdqa 48(%rdi), %xmm4
+; SSE41-NEXT: movntdqa 32(%rdi), %xmm5
+; SSE41-NEXT: movntdqa 16(%rdi), %xmm6
+; SSE41-NEXT: movntdqa (%rdi), %xmm7
+; SSE41-NEXT: paddb %xmm7, %xmm0
+; SSE41-NEXT: paddb %xmm6, %xmm1
+; SSE41-NEXT: paddb %xmm5, %xmm2
+; SSE41-NEXT: paddb %xmm4, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test_arg_v64i8:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovdqa (%rdi), %ymm2
-; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2
+; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3
+; AVX1-NEXT: vmovntdqa (%rdi), %xmm4
+; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpaddb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_arg_v64i8:
; AVX2: # BB#0:
-; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX2-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512F-LABEL: test_arg_v64i8:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vpaddb (%rdi), %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512F-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: test_arg_v64i8:
; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpaddb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm1
+; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: test_arg_v64i8:
; AVX512VL: # BB#0:
-; AVX512VL-NEXT: vpaddb (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1
+; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm2
+; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm3
+; AVX512VL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddb %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: retq
%1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1
%2 = add <64 x i8> %arg, %1
diff --git a/test/CodeGen/X86/pr32659.ll b/test/CodeGen/X86/pr32659.ll
new file mode 100644
index 000000000000..aafae9c4f6c9
--- /dev/null
+++ b/test/CodeGen/X86/pr32659.ll
@@ -0,0 +1,83 @@
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+@a = external global i32, align 4
+@d = external global i32*, align 4
+@k = external global i32**, align 4
+@j = external global i32***, align 4
+@h = external global i32, align 4
+@c = external global i32, align 4
+@i = external global i32, align 4
+@b = external global i32, align 4
+@f = external global i64, align 8
+@e = external global i64, align 8
+@g = external global i32, align 4
+
+; Function Attrs: norecurse nounwind optsize readnone
+declare i32 @fn1(i32 returned) #0
+
+
+; CHECK-LABEL: fn2
+; CHECK: calll putchar
+; CHECK: addl $1,
+; CHECK: adcl $0,
+; Function Attrs: nounwind optsize
+define void @fn2() #1 {
+entry:
+ %putchar = tail call i32 @putchar(i32 48)
+ %0 = load volatile i32, i32* @h, align 4
+ %1 = load i32, i32* @c, align 4, !tbaa !2
+ %2 = load i32***, i32**** @j, align 4
+ %3 = load i32**, i32*** %2, align 4
+ %4 = load i32*, i32** %3, align 4
+ %5 = load i32, i32* %4, align 4
+ %cmp = icmp sgt i32 %1, %5
+ %conv = zext i1 %cmp to i32
+ %6 = load i32, i32* @i, align 4
+ %cmp1 = icmp sgt i32 %6, %conv
+ %conv2 = zext i1 %cmp1 to i32
+ store i32 %conv2, i32* @b, align 4
+ %cmp3 = icmp sgt i32 %0, %conv2
+ %conv4 = zext i1 %cmp3 to i32
+ %7 = load i32, i32* @a, align 4
+ %or = xor i32 %7, %conv4
+ store i32 %or, i32* @a, align 4
+ %8 = load i32*, i32** @d, align 4
+ %9 = load i32, i32* %8, align 4
+ %conv6 = sext i32 %9 to i64
+ %10 = load i64, i64* @e, align 8
+ %and = and i64 %10, %conv6
+ store i64 %and, i64* @e, align 8
+ %11 = load i32, i32* @g, align 4
+ %dec = add nsw i32 %11, -1
+ store i32 %dec, i32* @g, align 4
+ %12 = load i64, i64* @f, align 8
+ %inc = add nsw i64 %12, 1
+ store i64 %inc, i64* @f, align 8
+ ret void
+}
+
+; Function Attrs: nounwind optsize
+declare i32 @main() #1
+
+; Function Attrs: nounwind
+declare i32 @putchar(i32) #2
+
+attributes #0 = { optsize readnone }
+attributes #1 = { optsize }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"NumRegisterParameters", i32 0}
+!1 = !{!"clang version 5.0.0 (trunk 300074) (llvm/trunk 300078)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"any pointer", !4, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long long", !4, i64 0}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 7c2937936313..0e8db74fe1bd 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -314,13 +314,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; GENERIC-NEXT: jmp LBB7_6
; GENERIC-NEXT: LBB7_4:
; GENERIC-NEXT: movd %r9d, %xmm1
-; GENERIC-NEXT: movd %ecx, %xmm2
+; GENERIC-NEXT: movd %r8d, %xmm2
; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; GENERIC-NEXT: movd %r8d, %xmm3
+; GENERIC-NEXT: movd %ecx, %xmm3
; GENERIC-NEXT: movd %edx, %xmm1
; GENERIC-NEXT: LBB7_6:
; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; GENERIC-NEXT: psubd {{.*}}(%rip), %xmm1
; GENERIC-NEXT: psubd {{.*}}(%rip), %xmm0
; GENERIC-NEXT: movq %xmm0, 16(%rsi)
@@ -350,16 +350,16 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; ATOM-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; ATOM-NEXT: jmp LBB7_6
; ATOM-NEXT: LBB7_4:
; ATOM-NEXT: movd %r9d, %xmm1
-; ATOM-NEXT: movd %ecx, %xmm2
+; ATOM-NEXT: movd %r8d, %xmm2
; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; ATOM-NEXT: movd %r8d, %xmm3
+; ATOM-NEXT: movd %ecx, %xmm3
; ATOM-NEXT: movd %edx, %xmm1
; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; ATOM-NEXT: LBB7_6:
; ATOM-NEXT: psubd {{.*}}(%rip), %xmm0
; ATOM-NEXT: psubd {{.*}}(%rip), %xmm1
diff --git a/test/CodeGen/X86/selectiondag-dominator.ll b/test/CodeGen/X86/selectiondag-dominator.ll
new file mode 100644
index 000000000000..f289a16f29eb
--- /dev/null
+++ b/test/CodeGen/X86/selectiondag-dominator.ll
@@ -0,0 +1,30 @@
+; Make sure we don't crash because we have a stale dominator tree.
+; PR33266
+; REQUIRES: asserts
+; RUN: llc -o /dev/null -verify-dom-info %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+@global = external global [8 x [8 x [4 x i8]]], align 2
+@global.1 = external global { i8, [3 x i8] }, align 4
+
+define void @patatino() local_unnamed_addr {
+bb:
+ br label %bb1
+
+bb1:
+ br label %bb2
+
+bb2:
+ br i1 icmp ne (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)), label %bb4, label %bb3
+
+bb3:
+ br i1 icmp eq (i64 ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), i64 0), label %bb5, label %bb4
+
+bb4:
+ %tmp = phi i64 [ ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), %bb3 ], [ 7, %bb2 ]
+ ret void
+
+bb5:
+ ret void
+}
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 0b03dffe99b5..e468c69db5dd 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -53,17 +53,17 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -86,18 +86,18 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %r8d, %edi
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %eax, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -121,15 +121,15 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: notl %edx
-; X32-NEXT: notl %ecx
; X32-NEXT: notl %esi
+; X32-NEXT: notl %ecx
; X32-NEXT: notl %eax
; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl %eax, (%esp)
-; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -138,7 +138,7 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -165,18 +165,18 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind
; X64-NEXT: notl %esi
; X64-NEXT: notl %edx
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %r8d, %edx
; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: andl %edi, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -1277,17 +1277,17 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: orl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -1310,18 +1310,18 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: orl %r8d, %edi
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: orl %eax, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
@@ -1538,16 +1538,16 @@ define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) n
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_ps:
; X64: # BB#0:
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0]
; X64-NEXT: movaps %xmm3, %xmm0
; X64-NEXT: retq
%res0 = insertelement <4 x float> undef, float %a3, i32 0
@@ -1677,16 +1677,16 @@ define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3)
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_ps:
; X64: # BB#0:
-; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%res0 = insertelement <4 x float> undef, float %a0, i32 0
%res1 = insertelement <4 x float> %res0, float %a1, i32 1
@@ -2239,17 +2239,17 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-NEXT: movl %esi, (%esp)
; X32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: leal -4(%ebp), %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebp
@@ -2272,18 +2272,18 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: xorl %r8d, %edi
; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; X64-NEXT: xorl %eax, %esi
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%arg0 = bitcast <4 x float> %a0 to <4 x i32>
%arg1 = bitcast <4 x float> %a1 to <4 x i32>
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index f4964b5a6f66..c74dec3e21b6 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -87,17 +87,17 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: .LBB1_11: # %entry
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X32-NEXT: retl
;
; X64-LABEL: vselect:
; X64: # BB#0: # %entry
-; X64-NEXT: testl %ecx, %ecx
+; X64-NEXT: testl %edx, %edx
; X64-NEXT: xorps %xmm0, %xmm0
; X64-NEXT: je .LBB1_1
; X64-NEXT: # BB#2: # %entry
; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: testl %edx, %edx
+; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: jne .LBB1_5
; X64-NEXT: .LBB1_4:
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
@@ -111,7 +111,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X64-NEXT: jmp .LBB1_11
; X64-NEXT: .LBB1_1:
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: testl %edx, %edx
+; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: je .LBB1_4
; X64-NEXT: .LBB1_5: # %entry
; X64-NEXT: xorps %xmm2, %xmm2
@@ -126,7 +126,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: .LBB1_11: # %entry
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
entry:
%a1 = icmp eq <4 x i32> %q, zeroinitializer
@@ -252,12 +252,12 @@ define <2 x float> @PR31672() #0 {
; X32-NEXT: movl %eax, (%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: andl %ecx, %edx
-; X32-NEXT: notl %ecx
-; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: orl %edx, %ecx
-; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X32-NEXT: andl %eax, %ecx
+; X32-NEXT: notl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: andl %ecx, %edx
@@ -277,7 +277,7 @@ define <2 x float> @PR31672() #0 {
; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: retl
@@ -297,48 +297,48 @@ define <2 x float> @PR31672() #0 {
; X64-NEXT: mulps %xmm1, %xmm0
; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi
-; X64-NEXT: movl %r9d, %esi
-; X64-NEXT: andl %edi, %esi
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: andl %edi, %eax
; X64-NEXT: movl %edi, %ecx
; X64-NEXT: notl %ecx
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
-; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; X64-NEXT: andl %eax, %ecx
-; X64-NEXT: orl %esi, %ecx
+; X64-NEXT: andl %edx, %ecx
+; X64-NEXT: orl %eax, %ecx
; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movl %r8d, %ecx
-; X64-NEXT: andl %r10d, %ecx
-; X64-NEXT: movl %r10d, %esi
-; X64-NEXT: notl %esi
-; X64-NEXT: andl %edx, %esi
-; X64-NEXT: orl %ecx, %esi
-; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
-; X64-NEXT: shrq $32, %r9
+; X64-NEXT: shrq $32, %rsi
; X64-NEXT: shrq $32, %rdi
-; X64-NEXT: andl %edi, %r9d
+; X64-NEXT: andl %edi, %esi
; X64-NEXT: notl %edi
-; X64-NEXT: shrq $32, %rax
-; X64-NEXT: andl %edi, %eax
-; X64-NEXT: orl %r9d, %eax
-; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; X64-NEXT: shrq $32, %r8
-; X64-NEXT: shrq $32, %r10
-; X64-NEXT: andl %r10d, %r8d
-; X64-NEXT: notl %r10d
; X64-NEXT: shrq $32, %rdx
-; X64-NEXT: andl %r10d, %edx
-; X64-NEXT: orl %r8d, %edx
+; X64-NEXT: andl %edi, %edx
+; X64-NEXT: orl %esi, %edx
; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movl %r8d, %eax
+; X64-NEXT: andl %r9d, %eax
+; X64-NEXT: movl %r9d, %ecx
+; X64-NEXT: notl %ecx
+; X64-NEXT: andl %r10d, %ecx
+; X64-NEXT: orl %eax, %ecx
+; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
+; X64-NEXT: shrq $32, %r8
+; X64-NEXT: shrq $32, %r9
+; X64-NEXT: andl %r9d, %r8d
+; X64-NEXT: notl %r9d
+; X64-NEXT: shrq $32, %r10
+; X64-NEXT: andl %r9d, %r10d
+; X64-NEXT: orl %r8d, %r10d
+; X64-NEXT: movl %r10d, -{{[0-9]+}}(%rsp)
; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> <float 42.0, float 3.0>)
ret <2 x float> %t0
diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
index 20387ccd6b7a..ff5d624e6042 100644
--- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
@@ -2076,7 +2076,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm2
; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2087,8 +2087,8 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2099,7 +2099,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm3
; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2110,27 +2110,27 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi8:
; X64: # BB#0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: movzbl %dl, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-NEXT: movzbl %r8b, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %r9b, %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
@@ -2138,20 +2138,20 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: movzbl %r9b, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
@@ -2161,9 +2161,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <16 x i8> undef, i8 %a15, i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1
@@ -2206,11 +2206,11 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi16:
@@ -2218,20 +2218,20 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4,
; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
; X64-NEXT: movd %edi, %xmm0
-; X64-NEXT: movd %r8d, %xmm1
+; X64-NEXT: movd %esi, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: movd %edx, %xmm0
-; X64-NEXT: movd %eax, %xmm2
+; X64-NEXT: movd %ecx, %xmm2
; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X64-NEXT: movd %esi, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT: movd %r8d, %xmm0
; X64-NEXT: movd %r9d, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: movd %ecx, %xmm3
+; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: movd %r10d, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%res0 = insertelement <8 x i16> undef, i16 %a7, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1
@@ -2254,18 +2254,18 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi32:
; X64: # BB#0:
; X64-NEXT: movd %edi, %xmm0
-; X64-NEXT: movd %edx, %xmm1
+; X64-NEXT: movd %esi, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %edx, %xmm2
; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <4 x i32> undef, i32 %a3, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1
@@ -2282,11 +2282,11 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind {
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set_epi64x:
@@ -2441,10 +2441,9 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind {
; X32-LABEL: test_mm_set1_epi64x:
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_set1_epi64x:
@@ -2486,7 +2485,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm2
; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2497,8 +2496,8 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm1
; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2509,7 +2508,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm3
; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
@@ -2520,9 +2519,9 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi8:
@@ -2534,46 +2533,46 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %cl, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %r9b, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm1
; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %r9b, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %dl, %eax
+; X64-NEXT: movzbl %r8b, %eax
; X64-NEXT: movd %eax, %xmm3
; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: movzbl %cl, %eax
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movzbl %r8b, %eax
+; X64-NEXT: movzbl %dl, %eax
; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movd %eax, %xmm4
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0
%res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1
@@ -2616,11 +2615,11 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi16:
@@ -2628,20 +2627,20 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4
; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax
; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w
; X64-NEXT: movd %eax, %xmm0
-; X64-NEXT: movd %ecx, %xmm1
+; X64-NEXT: movd %r10d, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: movd %r9d, %xmm0
-; X64-NEXT: movd %esi, %xmm2
+; X64-NEXT: movd %r8d, %xmm2
; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; X64-NEXT: movd %r10d, %xmm0
+; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X64-NEXT: movd %ecx, %xmm0
; X64-NEXT: movd %edx, %xmm1
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: movd %r8d, %xmm3
+; X64-NEXT: movd %esi, %xmm3
; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; X64-NEXT: retq
%res0 = insertelement <8 x i16> undef, i16 %a0, i32 0
%res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1
@@ -2664,18 +2663,18 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi32:
; X64: # BB#0:
; X64-NEXT: movd %ecx, %xmm0
-; X64-NEXT: movd %esi, %xmm1
+; X64-NEXT: movd %edx, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X64-NEXT: movd %edx, %xmm2
+; X64-NEXT: movd %esi, %xmm2
; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
%res0 = insertelement <4 x i32> undef, i32 %a0, i32 0
%res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1
@@ -2692,11 +2691,11 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind {
; X32: # BB#0:
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_setr_epi64x:
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 4d895ea264c5..b5aa26f532ef 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -342,9 +342,8 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-NEXT: subss %xmm1, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1,1,3]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test14:
@@ -375,8 +374,7 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE-NEXT: addss %xmm0, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,2,1]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0]
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
;
@@ -417,10 +415,10 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE-NEXT: addss %xmm0, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT: movapd %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test16:
diff --git a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
index 19305d0dad62..383ab21bd404 100644
--- a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
@@ -354,8 +354,9 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) {
; X32-LABEL: test_mm_crc32_u8:
; X32: # BB#0:
+; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: crc32b {{[0-9]+}}(%esp), %eax
+; X32-NEXT: crc32b %cl, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u8:
@@ -371,8 +372,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) {
; X32-LABEL: test_mm_crc32_u16:
; X32: # BB#0:
+; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: crc32w {{[0-9]+}}(%esp), %eax
+; X32-NEXT: crc32w %cx, %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_crc32_u16:
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index f937d484ce0d..4165aea8794f 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1651,9 +1651,26 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
}
declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-; TODO stack_fold_sqrtsd
+define double @stack_fold_sqrtsd(double %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtsd
+ ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call double @llvm.sqrt.f64(double %a0)
+ ret double %2
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
; TODO stack_fold_sqrtsd_int
-; TODO stack_fold_sqrtss
+
+define float @stack_fold_sqrtss(float %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtss
+ ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call float @llvm.sqrt.f32(float %a0)
+ ret float %2
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
; TODO stack_fold_sqrtss_int
define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
index 5c6f697610a0..3ca94b7b9467 100644
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -453,6 +453,21 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
; TODO stack_fold_pextrb
+; We can't naively fold pextrw as it only writes to a 16-bit memory location
+; even though it can store to a 32-bit register.
+define i16 @stack_fold_pextrw(<8 x i16> %a0) {
+; CHECK-LABEL: stack_fold_pextrw
+; CHECK: pextrw $1, {{%xmm[0-9][0-9]*}}, %[[GPR32:(e[a-z]+|r[0-9]+d)]]
+; CHECK: movl %[[GPR32]], {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Spill
+; CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+entry:
+; add forces execution domain
+ %add = add <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+ %extract = extractelement <8 x i16> %add, i32 1
+ %asm = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i16 %extract
+}
+
define i32 @stack_fold_pextrd(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pextrd
;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
@@ -473,8 +488,6 @@ define i64 @stack_fold_pextrq(<2 x i64> %a0) {
ret i64 %1
}
-; TODO stack_fold_pextrw
-
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
;CHECK-LABEL: stack_fold_phaddd
;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 3c99928824bc..8e253f11e93e 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -1,16 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; An integer truncation to i1 should be done with an and instruction to make
; sure only the LSBit survives. Test that this is the case both for a returned
; value and as the operand of a branch.
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
define zeroext i1 @test1(i32 %X) nounwind {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: retl
%Y = trunc i32 %X to i1
ret i1 %Y
}
-; CHECK-LABEL: test1:
-; CHECK: andb $1, %al
define i1 @test2(i32 %val, i32 %mask) nounwind {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: btl %ecx, %eax
+; CHECK-NEXT: jae .LBB1_2
+; CHECK-NEXT: # BB#1: # %ret_true
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB1_2: # %ret_false
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
entry:
%shifted = ashr i32 %val, %mask
%anded = and i32 %shifted, 1
@@ -21,10 +37,19 @@ ret_true:
ret_false:
ret i1 false
}
-; CHECK-LABEL: test2:
-; CHECK: btl
define i32 @test3(i8* %ptr) nounwind {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: testb $1, (%eax)
+; CHECK-NEXT: je .LBB2_2
+; CHECK-NEXT: # BB#1: # %cond_true
+; CHECK-NEXT: movl $21, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB2_2: # %cond_false
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: retl
%val = load i8, i8* %ptr
%tmp = trunc i8 %val to i1
br i1 %tmp, label %cond_true, label %cond_false
@@ -33,10 +58,18 @@ cond_true:
cond_false:
ret i32 42
}
-; CHECK-LABEL: test3:
-; CHECK: testb $1, (%eax)
define i32 @test4(i8* %ptr) nounwind {
+; CHECK-LABEL: test4:
+; CHECK: # BB#0:
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB3_2
+; CHECK-NEXT: # BB#1: # %cond_true
+; CHECK-NEXT: movl $21, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB3_2: # %cond_false
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: retl
%tmp = ptrtoint i8* %ptr to i1
br i1 %tmp, label %cond_true, label %cond_false
cond_true:
@@ -44,10 +77,29 @@ cond_true:
cond_false:
ret i32 42
}
-; CHECK-LABEL: test4:
-; CHECK: testb $1, 4(%esp)
define i32 @test5(double %d) nounwind {
+; CHECK-LABEL: test5:
+; CHECK: # BB#0:
+; CHECK-NEXT: pushl %eax
+; CHECK-NEXT: fldl {{[0-9]+}}(%esp)
+; CHECK-NEXT: fnstcw (%esp)
+; CHECK-NEXT: movzwl (%esp), %eax
+; CHECK-NEXT: movw $3199, (%esp) # imm = 0xC7F
+; CHECK-NEXT: fldcw (%esp)
+; CHECK-NEXT: movw %ax, (%esp)
+; CHECK-NEXT: fistps {{[0-9]+}}(%esp)
+; CHECK-NEXT: fldcw (%esp)
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB4_2
+; CHECK-NEXT: # BB#1: # %cond_true
+; CHECK-NEXT: movl $21, %eax
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB4_2: # %cond_false
+; CHECK-NEXT: movl $42, %eax
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: retl
%tmp = fptosi double %d to i1
br i1 %tmp, label %cond_true, label %cond_false
cond_true:
@@ -55,5 +107,3 @@ cond_true:
cond_false:
ret i32 42
}
-; CHECK-LABEL: test5:
-; CHECK: testb $1
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
index 477150016486..6cfe41ac503d 100644
--- a/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -1320,17 +1320,17 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -1560,33 +1560,33 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
+; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; SSE-NEXT: movaps %xmm1, %xmm2
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movaps %xmm1, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: retq
;
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index a42b3c96c3ae..7cb1c95cb01a 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1169,16 +1169,16 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-LABEL: sitofp_4i64_to_4f32_undef:
; SSE: # BB#0:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -1368,21 +1368,22 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-LABEL: sitofp_4i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: movq %xmm1, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_4i64_to_4f32:
@@ -1838,21 +1839,14 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-LABEL: uitofp_4i64_to_4f32_undef:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: js .LBB41_2
-; SSE-NEXT: # BB#1:
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: .LBB41_2:
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: js .LBB41_3
-; SSE-NEXT: # BB#4:
+; SSE-NEXT: js .LBB41_1
+; SSE-NEXT: # BB#2:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: jmp .LBB41_5
-; SSE-NEXT: .LBB41_3:
+; SSE-NEXT: jmp .LBB41_3
+; SSE-NEXT: .LBB41_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
@@ -1860,17 +1854,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
-; SSE-NEXT: .LBB41_5:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: .LBB41_3:
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
-; SSE-NEXT: js .LBB41_6
-; SSE-NEXT: # BB#7:
+; SSE-NEXT: js .LBB41_4
+; SSE-NEXT: # BB#5:
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: jmp .LBB41_8
-; SSE-NEXT: .LBB41_6:
+; SSE-NEXT: jmp .LBB41_6
+; SSE-NEXT: .LBB41_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
@@ -1878,9 +1871,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: addss %xmm1, %xmm1
-; SSE-NEXT: .LBB41_8:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: .LBB41_6:
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: testq %rax, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: js .LBB41_8
+; SSE-NEXT: # BB#7:
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: .LBB41_8:
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
; SSE-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
@@ -2149,32 +2149,32 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: jmp .LBB47_3
; SSE-NEXT: .LBB47_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB47_3:
-; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB47_6
; SSE-NEXT: .LBB47_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB47_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB47_7
; SSE-NEXT: # BB#8:
@@ -2208,9 +2208,9 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB47_12:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE-NEXT: movaps %xmm2, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_4i64_to_4f32:
@@ -3381,22 +3381,23 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; SSE-LABEL: sitofp_load_4i64_to_4f32:
; SSE: # BB#0:
; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm2
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_4i64_to_4f32:
@@ -3546,41 +3547,42 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-LABEL: sitofp_load_8i64_to_8f32:
; SSE: # BB#0:
; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm2
-; SSE-NEXT: movdqa 32(%rdi), %xmm3
-; SSE-NEXT: movdqa 48(%rdi), %xmm4
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movdqa 32(%rdi), %xmm2
+; SSE-NEXT: movdqa 48(%rdi), %xmm3
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE-NEXT: movq %xmm2, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE-NEXT: movq %xmm4, %rax
-; SSE-NEXT: xorps %xmm2, %xmm2
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: xorps %xmm4, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: xorps %xmm1, %xmm1
; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT: movq %xmm2, %rax
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: sitofp_load_8i64_to_8f32:
@@ -3822,73 +3824,73 @@ define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) {
define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) {
; SSE-LABEL: uitofp_load_4i64_to_4f32:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm3
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movdqa (%rdi), %xmm2
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB76_3
; SSE-NEXT: .LBB76_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm2
-; SSE-NEXT: addss %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB76_3:
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB76_6
; SSE-NEXT: .LBB76_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB76_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_7
; SSE-NEXT: # BB#8:
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: jmp .LBB76_9
; SSE-NEXT: .LBB76_7:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB76_9:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB76_10
; SSE-NEXT: # BB#11:
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: jmp .LBB76_12
; SSE-NEXT: .LBB76_10:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: xorps %xmm2, %xmm2
+; SSE-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB76_12:
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_4i64_to_4f32:
@@ -4186,121 +4188,121 @@ define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) {
define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-LABEL: uitofp_load_8i64_to_8f32:
; SSE: # BB#0:
-; SSE-NEXT: movdqa (%rdi), %xmm1
-; SSE-NEXT: movdqa 16(%rdi), %xmm5
+; SSE-NEXT: movdqa (%rdi), %xmm5
+; SSE-NEXT: movdqa 16(%rdi), %xmm0
; SSE-NEXT: movdqa 32(%rdi), %xmm2
-; SSE-NEXT: movdqa 48(%rdi), %xmm3
-; SSE-NEXT: movq %xmm5, %rax
+; SSE-NEXT: movdqa 48(%rdi), %xmm1
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_1
; SSE-NEXT: # BB#2:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
; SSE-NEXT: jmp .LBB80_3
; SSE-NEXT: .LBB80_1:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm4
-; SSE-NEXT: addss %xmm4, %xmm4
+; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: addss %xmm3, %xmm3
; SSE-NEXT: .LBB80_3:
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT: movq %xmm0, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_4
; SSE-NEXT: # BB#5:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
; SSE-NEXT: jmp .LBB80_6
; SSE-NEXT: .LBB80_4:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm0
-; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm4
+; SSE-NEXT: addss %xmm4, %xmm4
; SSE-NEXT: .LBB80_6:
-; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
; SSE-NEXT: movq %xmm5, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_7
; SSE-NEXT: # BB#8:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm6
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
; SSE-NEXT: jmp .LBB80_9
; SSE-NEXT: .LBB80_7:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm6
-; SSE-NEXT: addss %xmm6, %xmm6
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE-NEXT: addss %xmm0, %xmm0
; SSE-NEXT: .LBB80_9:
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE-NEXT: movq %xmm1, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
+; SSE-NEXT: movq %xmm5, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_10
; SSE-NEXT: # BB#11:
-; SSE-NEXT: xorps %xmm5, %xmm5
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm6
; SSE-NEXT: jmp .LBB80_12
; SSE-NEXT: .LBB80_10:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm5, %xmm5
-; SSE-NEXT: cvtsi2ssq %rax, %xmm5
-; SSE-NEXT: addss %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm6
+; SSE-NEXT: addss %xmm6, %xmm6
; SSE-NEXT: .LBB80_12:
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_13
; SSE-NEXT: # BB#14:
-; SSE-NEXT: cvtsi2ssq %rax, %xmm7
+; SSE-NEXT: xorps %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm5
; SSE-NEXT: jmp .LBB80_15
; SSE-NEXT: .LBB80_13:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: cvtsi2ssq %rax, %xmm7
-; SSE-NEXT: addss %xmm7, %xmm7
+; SSE-NEXT: xorps %xmm5, %xmm5
+; SSE-NEXT: cvtsi2ssq %rax, %xmm5
+; SSE-NEXT: addss %xmm5, %xmm5
; SSE-NEXT: .LBB80_15:
-; SSE-NEXT: movq %xmm2, %rax
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT: movq %xmm1, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_16
; SSE-NEXT: # BB#17:
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm7
; SSE-NEXT: jmp .LBB80_18
; SSE-NEXT: .LBB80_16:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: cvtsi2ssq %rax, %xmm1
-; SSE-NEXT: addss %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm7
+; SSE-NEXT: addss %xmm7, %xmm7
; SSE-NEXT: .LBB80_18:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
-; SSE-NEXT: movq %xmm3, %rax
+; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
+; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
; SSE-NEXT: js .LBB80_19
; SSE-NEXT: # BB#20:
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
; SSE-NEXT: jmp .LBB80_21
; SSE-NEXT: .LBB80_19:
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shrq %rcx
; SSE-NEXT: andl $1, %eax
; SSE-NEXT: orq %rcx, %rax
-; SSE-NEXT: xorps %xmm3, %xmm3
-; SSE-NEXT: cvtsi2ssq %rax, %xmm3
-; SSE-NEXT: addss %xmm3, %xmm3
+; SSE-NEXT: xorps %xmm1, %xmm1
+; SSE-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE-NEXT: addss %xmm1, %xmm1
; SSE-NEXT: .LBB80_21:
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1]
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; SSE-NEXT: movq %xmm2, %rax
; SSE-NEXT: testq %rax, %rax
@@ -4318,8 +4320,8 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) {
; SSE-NEXT: cvtsi2ssq %rax, %xmm2
; SSE-NEXT: addss %xmm2, %xmm2
; SSE-NEXT: .LBB80_24:
-; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0]
; SSE-NEXT: retq
;
; AVX1-LABEL: uitofp_load_8i64_to_8f32:
diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll
index 6439a6dcb00b..918430efea1d 100644
--- a/test/CodeGen/X86/vec_set.ll
+++ b/test/CodeGen/X86/vec_set.ll
@@ -12,35 +12,35 @@ define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i1
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; X86-NEXT: movdqa %xmm3, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: test:
; X64: # BB#0:
-; X64-NEXT: movd %r8d, %xmm0
+; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: movd %edx, %xmm1
-; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: movd %ecx, %xmm0
+; X64-NEXT: movd %r9d, %xmm0
; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X64-NEXT: movd %r9d, %xmm2
+; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: movd %r8d, %xmm1
+; X64-NEXT: movd %ecx, %xmm2
+; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X64-NEXT: movd %edx, %xmm1
; X64-NEXT: movd %esi, %xmm3
-; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
; X64-NEXT: movdqa %xmm3, (%rdi)
; X64-NEXT: retq
%tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0
diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll
index 4fa9596192a6..ce0b067f5043 100644
--- a/test/CodeGen/X86/vector-compare-results.ll
+++ b/test/CodeGen/X86/vector-compare-results.ll
@@ -5345,217 +5345,213 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
;
; AVX1-LABEL: test_cmp_v64i16:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9
-; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8
-; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm5
+; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm2
-; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
-; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpacksswb %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vpextrb $15, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $14, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $12, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $10, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $8, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $6, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $4, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $2, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $0, %xmm3, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $14, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $12, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $10, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $8, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $0, %xmm6, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $6, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $4, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $2, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm2, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
; AVX1-NEXT: vpextrb $0, %xmm2, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, 4(%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm5, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, 4(%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $14, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $12, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $10, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $8, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $6, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $4, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $2, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
-; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $0, %xmm1, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $15, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $14, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $13, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $12, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $11, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $10, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $9, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $8, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $0, %xmm4, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $7, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $6, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $12, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $5, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $10, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $4, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $8, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $3, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $6, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $2, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $4, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
-; AVX1-NEXT: vpextrb $1, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: vpextrb $2, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
+; AVX1-NEXT: movb %al, (%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm8, %eax
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: vpextrb $0, %xmm8, %eax
-; AVX1-NEXT: andb $1, %al
+; AVX1-NEXT: andl $1, %eax
; AVX1-NEXT: movb %al, (%rdi)
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: vzeroupper
@@ -5565,207 +5561,203 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind {
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm0, %xmm0
; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm1, %xmm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2
-; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm6
; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpextrb $15, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm7
+; AVX2-NEXT: vpextrb $14, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm7, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm3, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
; AVX2-NEXT: vpextrb $0, %xmm3, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm6, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm2, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
; AVX2-NEXT: vpextrb $0, %xmm2, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, 4(%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm5, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm1, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: vpextrb $0, %xmm1, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $15, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $14, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $13, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $12, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $11, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $10, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $9, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $8, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $0, %xmm4, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $7, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $14, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $6, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $12, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $5, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $10, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $4, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $8, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $3, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $6, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $2, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $4, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
-; AVX2-NEXT: vpextrb $1, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: vpextrb $2, %xmm0, %eax
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: andb $1, %al
+; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: movb %al, (%rdi)
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: vzeroupper
diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll
index 340dd77ec481..3e3e93a7d5b0 100644
--- a/test/CodeGen/X86/vector-rem.ll
+++ b/test/CodeGen/X86/vector-rem.ll
@@ -11,9 +11,9 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
@@ -24,15 +24,15 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %ecx
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %ecx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%m = srem <4 x i32> %t, %u
@@ -49,9 +49,9 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
; CHECK-NEXT: movd %xmm3, %ecx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
@@ -62,15 +62,15 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; CHECK-NEXT: movd %xmm0, %ecx
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divl %ecx
; CHECK-NEXT: movd %edx, %xmm0
; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%m = urem <4 x i32> %t, %u
@@ -88,9 +88,9 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
@@ -100,15 +100,15 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; CHECK-NEXT: callq fmodf
; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: unpcklps (%rsp), %xmm1 # 16-byte Folded Reload
-; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: addq $72, %rsp
; CHECK-NEXT: retq
%m = frem <4 x float> %t, %u
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 53e471d6f175..392c0de95f24 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -1333,19 +1333,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: shlq $61, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $61, %rcx
+; SSE2-NEXT: shlq $62, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: shlq $63, %rax
; SSE2-NEXT: sarq $63, %rax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_4i1_to_4i32:
@@ -1356,19 +1356,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: shlq $61, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $61, %rcx
+; SSSE3-NEXT: shlq $62, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: shlq $63, %rax
; SSSE3-NEXT: sarq $63, %rax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1523,14 +1523,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; SSE2-NEXT: shrl $3, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: shrl $2, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: shrl $2, %eax
+; SSE2-NEXT: shrl %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
; SSE2-NEXT: psllq $63, %xmm0
@@ -1549,14 +1549,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; SSSE3-NEXT: shrl $3, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: shrl $2, %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: shrl $2, %eax
+; SSSE3-NEXT: shrl %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
; SSSE3-NEXT: psllq $63, %xmm0
@@ -1813,7 +1813,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-NEXT: shrq $7, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $60, %rcx
+; SSE2-NEXT: shlq $57, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
@@ -1822,13 +1822,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: shlq $59, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $57, %rcx
+; SSE2-NEXT: shlq $60, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movq %rax, %rcx
@@ -1837,15 +1837,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSE2-NEXT: movq %rax, %rcx
-; SSE2-NEXT: shlq $59, %rcx
+; SSE2-NEXT: shlq $62, %rcx
; SSE2-NEXT: sarq $63, %rcx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: shlq $63, %rax
; SSE2-NEXT: sarq $63, %rax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_8i1_to_8i16:
@@ -1855,7 +1855,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSSE3-NEXT: shrq $7, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $60, %rcx
+; SSSE3-NEXT: shlq $57, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
@@ -1864,13 +1864,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: shlq $59, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $57, %rcx
+; SSSE3-NEXT: shlq $60, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movq %rax, %rcx
@@ -1879,15 +1879,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSSE3-NEXT: movq %rax, %rcx
-; SSSE3-NEXT: shlq $59, %rcx
+; SSSE3-NEXT: shlq $62, %rcx
; SSSE3-NEXT: sarq $63, %rcx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: shlq $63, %rax
; SSSE3-NEXT: sarq $63, %rax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_8i1_to_8i16:
@@ -2191,7 +2191,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $6, %ecx
+; SSE2-NEXT: shrl $3, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
@@ -2203,30 +2203,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $4, %ecx
+; SSE2-NEXT: shrl %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movl %eax, %ecx
; SSE2-NEXT: shrl $5, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: shrl $4, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: shrl $6, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: shrl $7, %eax
; SSE2-NEXT: movzwl %ax, %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pslld $31, %xmm0
@@ -2240,7 +2240,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movzbl (%rdi), %eax
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $6, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
@@ -2252,30 +2252,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $4, %ecx
+; SSSE3-NEXT: shrl %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: movl %eax, %ecx
; SSSE3-NEXT: shrl $5, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: shrl $4, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: shrl $6, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: shrl $7, %eax
; SSSE3-NEXT: movzwl %ax, %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: pslld $31, %xmm0
@@ -2546,69 +2546,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
; SSE2-NEXT: movq %rax, %rsi
; SSE2-NEXT: movq %rax, %rdi
; SSE2-NEXT: movq %rax, %rbp
-; SSE2-NEXT: shlq $49, %rbp
-; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: shrq $15, %rbp
; SSE2-NEXT: movd %ebp, %xmm0
; SSE2-NEXT: movq %rax, %rbp
; SSE2-NEXT: movsbq %al, %rax
-; SSE2-NEXT: shlq $57, %r8
+; SSE2-NEXT: shlq $49, %r8
; SSE2-NEXT: sarq $63, %r8
; SSE2-NEXT: movd %r8d, %xmm1
-; SSE2-NEXT: shlq $53, %r9
+; SSE2-NEXT: shlq $50, %r9
; SSE2-NEXT: sarq $63, %r9
; SSE2-NEXT: movd %r9d, %xmm2
-; SSE2-NEXT: shlq $61, %r10
+; SSE2-NEXT: shlq $51, %r10
; SSE2-NEXT: sarq $63, %r10
; SSE2-NEXT: movd %r10d, %xmm3
-; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: shlq $52, %r11
; SSE2-NEXT: sarq $63, %r11
; SSE2-NEXT: movd %r11d, %xmm4
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: shlq $53, %r14
; SSE2-NEXT: sarq $63, %r14
-; SSE2-NEXT: movd %r14d, %xmm5
+; SSE2-NEXT: movd %r14d, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: shlq $54, %r15
; SSE2-NEXT: sarq $63, %r15
; SSE2-NEXT: movd %r15d, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: shlq $55, %r12
; SSE2-NEXT: sarq $63, %r12
-; SSE2-NEXT: movd %r12d, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: movd %r12d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: shlq $60, %r13
; SSE2-NEXT: sarq $63, %r13
-; SSE2-NEXT: movd %r13d, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: shlq $58, %rbx
+; SSE2-NEXT: movd %r13d, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: shlq $61, %rbx
; SSE2-NEXT: sarq $63, %rbx
; SSE2-NEXT: movd %ebx, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT: shlq $54, %rcx
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: shlq $62, %rcx
; SSE2-NEXT: sarq $63, %rcx
-; SSE2-NEXT: movd %ecx, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: shlq $62, %rdx
+; SSE2-NEXT: movd %ecx, %xmm5
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT: shlq $63, %rdx
; SSE2-NEXT: sarq $63, %rdx
-; SSE2-NEXT: movd %edx, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: shlq $52, %rsi
+; SSE2-NEXT: movd %edx, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE2-NEXT: shlq $58, %rsi
; SSE2-NEXT: sarq $63, %rsi
-; SSE2-NEXT: movd %esi, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT: shlq $60, %rdi
+; SSE2-NEXT: movd %esi, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT: shlq $59, %rdi
; SSE2-NEXT: sarq $63, %rdi
; SSE2-NEXT: movd %edi, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSE2-NEXT: shrq $15, %rbp
-; SSE2-NEXT: movd %ebp, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT: shlq $57, %rbp
+; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: movd %ebp, %xmm2
; SSE2-NEXT: shrq $7, %rax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: popq %rbx
; SSE2-NEXT: popq %r12
; SSE2-NEXT: popq %r13
@@ -2640,69 +2640,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
; SSSE3-NEXT: movq %rax, %rsi
; SSSE3-NEXT: movq %rax, %rdi
; SSSE3-NEXT: movq %rax, %rbp
-; SSSE3-NEXT: shlq $49, %rbp
-; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: shrq $15, %rbp
; SSSE3-NEXT: movd %ebp, %xmm0
; SSSE3-NEXT: movq %rax, %rbp
; SSSE3-NEXT: movsbq %al, %rax
-; SSSE3-NEXT: shlq $57, %r8
+; SSSE3-NEXT: shlq $49, %r8
; SSSE3-NEXT: sarq $63, %r8
; SSSE3-NEXT: movd %r8d, %xmm1
-; SSSE3-NEXT: shlq $53, %r9
+; SSSE3-NEXT: shlq $50, %r9
; SSSE3-NEXT: sarq $63, %r9
; SSSE3-NEXT: movd %r9d, %xmm2
-; SSSE3-NEXT: shlq $61, %r10
+; SSSE3-NEXT: shlq $51, %r10
; SSSE3-NEXT: sarq $63, %r10
; SSSE3-NEXT: movd %r10d, %xmm3
-; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: shlq $52, %r11
; SSSE3-NEXT: sarq $63, %r11
; SSSE3-NEXT: movd %r11d, %xmm4
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: shlq $53, %r14
; SSSE3-NEXT: sarq $63, %r14
-; SSSE3-NEXT: movd %r14d, %xmm5
+; SSSE3-NEXT: movd %r14d, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: shlq $54, %r15
; SSSE3-NEXT: sarq $63, %r15
; SSSE3-NEXT: movd %r15d, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
-; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: shlq $55, %r12
; SSSE3-NEXT: sarq $63, %r12
-; SSSE3-NEXT: movd %r12d, %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
-; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: movd %r12d, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSSE3-NEXT: shlq $60, %r13
; SSSE3-NEXT: sarq $63, %r13
-; SSSE3-NEXT: movd %r13d, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: shlq $58, %rbx
+; SSSE3-NEXT: movd %r13d, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: shlq $61, %rbx
; SSSE3-NEXT: sarq $63, %rbx
; SSSE3-NEXT: movd %ebx, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT: shlq $54, %rcx
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: shlq $62, %rcx
; SSSE3-NEXT: sarq $63, %rcx
-; SSSE3-NEXT: movd %ecx, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: shlq $62, %rdx
+; SSSE3-NEXT: movd %ecx, %xmm5
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSSE3-NEXT: shlq $63, %rdx
; SSSE3-NEXT: sarq $63, %rdx
-; SSSE3-NEXT: movd %edx, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: shlq $52, %rsi
+; SSSE3-NEXT: movd %edx, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSSE3-NEXT: shlq $58, %rsi
; SSSE3-NEXT: sarq $63, %rsi
-; SSSE3-NEXT: movd %esi, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT: shlq $60, %rdi
+; SSSE3-NEXT: movd %esi, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT: shlq $59, %rdi
; SSSE3-NEXT: sarq $63, %rdi
; SSSE3-NEXT: movd %edi, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
-; SSSE3-NEXT: shrq $15, %rbp
-; SSSE3-NEXT: movd %ebp, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT: shlq $57, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: movd %ebp, %xmm2
; SSSE3-NEXT: shrq $7, %rax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT: movd %eax, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: popq %rbx
; SSSE3-NEXT: popq %r12
; SSSE3-NEXT: popq %r13
@@ -3002,7 +3002,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2: # BB#0: # %entry
; SSE2-NEXT: movzwl (%rdi), %eax
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $14, %ecx
+; SSE2-NEXT: shrl $7, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
@@ -3011,21 +3011,21 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $10, %ecx
+; SSE2-NEXT: shrl $5, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $2, %ecx
+; SSE2-NEXT: shrl $4, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $12, %ecx
+; SSE2-NEXT: shrl $3, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $4, %ecx
+; SSE2-NEXT: shrl $2, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
@@ -3033,18 +3033,18 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm1
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: shrl %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $13, %ecx
+; SSE2-NEXT: shrl $11, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $5, %ecx
+; SSE2-NEXT: shrl $10, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -3053,31 +3053,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl %ecx
+; SSE2-NEXT: shrl $8, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $11, %ecx
+; SSE2-NEXT: shrl $13, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $3, %ecx
+; SSE2-NEXT: shrl $12, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm3
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $7, %ecx
+; SSE2-NEXT: shrl $14, %ecx
; SSE2-NEXT: andl $1, %ecx
; SSE2-NEXT: movd %ecx, %xmm2
; SSE2-NEXT: shrl $15, %eax
; SSE2-NEXT: movzwl %ax, %eax
; SSE2-NEXT: movd %eax, %xmm4
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psllw $15, %xmm0
@@ -3091,7 +3091,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3: # BB#0: # %entry
; SSSE3-NEXT: movzwl (%rdi), %eax
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $14, %ecx
+; SSSE3-NEXT: shrl $7, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
@@ -3100,21 +3100,21 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $10, %ecx
+; SSSE3-NEXT: shrl $5, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $2, %ecx
+; SSSE3-NEXT: shrl $4, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $12, %ecx
+; SSSE3-NEXT: shrl $3, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $4, %ecx
+; SSSE3-NEXT: shrl $2, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
@@ -3122,18 +3122,18 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm1
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $8, %ecx
+; SSSE3-NEXT: shrl %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $13, %ecx
+; SSSE3-NEXT: shrl $11, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $5, %ecx
+; SSSE3-NEXT: shrl $10, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
@@ -3142,31 +3142,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl %ecx
+; SSSE3-NEXT: shrl $8, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $11, %ecx
+; SSSE3-NEXT: shrl $13, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $3, %ecx
+; SSSE3-NEXT: shrl $12, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm3
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; SSSE3-NEXT: movl %eax, %ecx
-; SSSE3-NEXT: shrl $7, %ecx
+; SSSE3-NEXT: shrl $14, %ecx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movd %ecx, %xmm2
; SSSE3-NEXT: shrl $15, %eax
; SSSE3-NEXT: movzwl %ax, %eax
; SSSE3-NEXT: movd %eax, %xmm4
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: psllw $15, %xmm0
@@ -3556,162 +3556,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; SSE2-NEXT: pushq %r13
; SSE2-NEXT: pushq %r12
; SSE2-NEXT: pushq %rbx
-; SSE2-NEXT: movswq (%rdi), %rbx
-; SSE2-NEXT: movq %rbx, %r10
-; SSE2-NEXT: movq %rbx, %r8
-; SSE2-NEXT: movq %rbx, %r9
-; SSE2-NEXT: movq %rbx, %r11
-; SSE2-NEXT: movq %rbx, %r14
-; SSE2-NEXT: movq %rbx, %r15
-; SSE2-NEXT: movq %rbx, %r12
-; SSE2-NEXT: movq %rbx, %r13
-; SSE2-NEXT: movq %rbx, %rdx
-; SSE2-NEXT: movq %rbx, %rsi
-; SSE2-NEXT: movq %rbx, %rcx
-; SSE2-NEXT: movq %rbx, %rbp
-; SSE2-NEXT: movq %rbx, %rax
-; SSE2-NEXT: shlq $49, %rax
-; SSE2-NEXT: sarq $63, %rax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movq %rbx, %rax
-; SSE2-NEXT: shlq $57, %r10
+; SSE2-NEXT: movswq (%rdi), %rax
+; SSE2-NEXT: movq %rax, %r10
+; SSE2-NEXT: movq %rax, %r8
+; SSE2-NEXT: movq %rax, %r9
+; SSE2-NEXT: movq %rax, %r11
+; SSE2-NEXT: movq %rax, %r14
+; SSE2-NEXT: movq %rax, %r15
+; SSE2-NEXT: movq %rax, %r12
+; SSE2-NEXT: movq %rax, %r13
+; SSE2-NEXT: movq %rax, %rdx
+; SSE2-NEXT: movq %rax, %rsi
+; SSE2-NEXT: movq %rax, %rcx
+; SSE2-NEXT: movq %rax, %rbp
+; SSE2-NEXT: movq %rax, %rbx
+; SSE2-NEXT: shrq $15, %rbx
+; SSE2-NEXT: movd %ebx, %xmm0
+; SSE2-NEXT: movq %rax, %rbx
+; SSE2-NEXT: shlq $49, %r10
; SSE2-NEXT: sarq $63, %r10
; SSE2-NEXT: movd %r10d, %xmm15
-; SSE2-NEXT: movq %rbx, %r10
-; SSE2-NEXT: movsbq %bl, %rbx
+; SSE2-NEXT: movq %rax, %r10
+; SSE2-NEXT: movsbq %al, %rax
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSE2-NEXT: shlq $53, %r8
+; SSE2-NEXT: shlq $50, %r8
; SSE2-NEXT: sarq $63, %r8
; SSE2-NEXT: movd %r8d, %xmm8
-; SSE2-NEXT: shlq $61, %r9
+; SSE2-NEXT: shlq $51, %r9
; SSE2-NEXT: sarq $63, %r9
-; SSE2-NEXT: movd %r9d, %xmm2
-; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: movd %r9d, %xmm3
+; SSE2-NEXT: shlq $52, %r11
; SSE2-NEXT: sarq $63, %r11
; SSE2-NEXT: movd %r11d, %xmm9
-; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: shlq $53, %r14
; SSE2-NEXT: sarq $63, %r14
-; SSE2-NEXT: movd %r14d, %xmm5
-; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: movd %r14d, %xmm6
+; SSE2-NEXT: shlq $54, %r15
; SSE2-NEXT: sarq $63, %r15
; SSE2-NEXT: movd %r15d, %xmm10
-; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: shlq $55, %r12
; SSE2-NEXT: sarq $63, %r12
-; SSE2-NEXT: movd %r12d, %xmm0
-; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: movd %r12d, %xmm2
+; SSE2-NEXT: shlq $60, %r13
; SSE2-NEXT: sarq $63, %r13
; SSE2-NEXT: movd %r13d, %xmm11
-; SSE2-NEXT: shlq $58, %rdx
+; SSE2-NEXT: shlq $61, %rdx
; SSE2-NEXT: sarq $63, %rdx
-; SSE2-NEXT: movd %edx, %xmm4
-; SSE2-NEXT: shlq $54, %rsi
+; SSE2-NEXT: movd %edx, %xmm5
+; SSE2-NEXT: shlq $62, %rsi
; SSE2-NEXT: sarq $63, %rsi
; SSE2-NEXT: movd %esi, %xmm12
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: shlq $63, %rcx
; SSE2-NEXT: sarq $63, %rcx
-; SSE2-NEXT: movd %ecx, %xmm6
-; SSE2-NEXT: shlq $52, %rbp
+; SSE2-NEXT: movd %ecx, %xmm0
+; SSE2-NEXT: shlq $58, %rbp
; SSE2-NEXT: sarq $63, %rbp
; SSE2-NEXT: movd %ebp, %xmm13
-; SSE2-NEXT: shlq $60, %rax
-; SSE2-NEXT: sarq $63, %rax
-; SSE2-NEXT: movd %eax, %xmm7
-; SSE2-NEXT: shrq $15, %r10
-; SSE2-NEXT: movd %r10d, %xmm14
-; SSE2-NEXT: shrq $7, %rbx
-; SSE2-NEXT: movd %ebx, %xmm3
-; SSE2-NEXT: movswq 2(%rdi), %rdx
-; SSE2-NEXT: movq %rdx, %r8
-; SSE2-NEXT: movq %rdx, %r9
-; SSE2-NEXT: movq %rdx, %r10
-; SSE2-NEXT: movq %rdx, %r11
-; SSE2-NEXT: movq %rdx, %r14
-; SSE2-NEXT: movq %rdx, %r15
-; SSE2-NEXT: movq %rdx, %r12
-; SSE2-NEXT: movq %rdx, %r13
-; SSE2-NEXT: movq %rdx, %rbx
-; SSE2-NEXT: movq %rdx, %rax
-; SSE2-NEXT: movq %rdx, %rcx
-; SSE2-NEXT: movq %rdx, %rsi
-; SSE2-NEXT: movq %rdx, %rdi
-; SSE2-NEXT: movq %rdx, %rbp
-; SSE2-NEXT: shlq $49, %rbp
-; SSE2-NEXT: sarq $63, %rbp
+; SSE2-NEXT: shlq $59, %rbx
+; SSE2-NEXT: sarq $63, %rbx
+; SSE2-NEXT: movd %ebx, %xmm7
+; SSE2-NEXT: shlq $57, %r10
+; SSE2-NEXT: sarq $63, %r10
+; SSE2-NEXT: movd %r10d, %xmm4
+; SSE2-NEXT: shrq $7, %rax
+; SSE2-NEXT: movd %eax, %xmm14
+; SSE2-NEXT: movswq 2(%rdi), %rsi
+; SSE2-NEXT: movq %rsi, %r8
+; SSE2-NEXT: movq %rsi, %r9
+; SSE2-NEXT: movq %rsi, %r10
+; SSE2-NEXT: movq %rsi, %r11
+; SSE2-NEXT: movq %rsi, %r14
+; SSE2-NEXT: movq %rsi, %r15
+; SSE2-NEXT: movq %rsi, %r12
+; SSE2-NEXT: movq %rsi, %r13
+; SSE2-NEXT: movq %rsi, %rbx
+; SSE2-NEXT: movq %rsi, %rax
+; SSE2-NEXT: movq %rsi, %rcx
+; SSE2-NEXT: movq %rsi, %rdx
+; SSE2-NEXT: movq %rsi, %rdi
+; SSE2-NEXT: movq %rsi, %rbp
+; SSE2-NEXT: shrq $15, %rbp
; SSE2-NEXT: movd %ebp, %xmm1
-; SSE2-NEXT: movq %rdx, %rbp
-; SSE2-NEXT: movsbq %dl, %rdx
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSE2-NEXT: movq %rsi, %rbp
+; SSE2-NEXT: movsbq %sil, %rsi
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
-; SSE2-NEXT: shlq $57, %r8
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
+; SSE2-NEXT: shlq $49, %r8
; SSE2-NEXT: sarq $63, %r8
-; SSE2-NEXT: movd %r8d, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; SSE2-NEXT: shlq $53, %r9
+; SSE2-NEXT: movd %r8d, %xmm3
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSE2-NEXT: shlq $50, %r9
; SSE2-NEXT: sarq $63, %r9
-; SSE2-NEXT: movd %r9d, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT: shlq $61, %r10
+; SSE2-NEXT: movd %r9d, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE2-NEXT: shlq $51, %r10
; SSE2-NEXT: sarq $63, %r10
-; SSE2-NEXT: movd %r10d, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; SSE2-NEXT: shlq $51, %r11
+; SSE2-NEXT: movd %r10d, %xmm5
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: shlq $52, %r11
; SSE2-NEXT: sarq $63, %r11
-; SSE2-NEXT: movd %r11d, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT: shlq $59, %r14
+; SSE2-NEXT: movd %r11d, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT: shlq $53, %r14
; SSE2-NEXT: sarq $63, %r14
-; SSE2-NEXT: movd %r14d, %xmm6
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT: shlq $55, %r15
+; SSE2-NEXT: movd %r14d, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE2-NEXT: shlq $54, %r15
; SSE2-NEXT: sarq $63, %r15
-; SSE2-NEXT: movd %r15d, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSE2-NEXT: shlq $63, %r12
+; SSE2-NEXT: movd %r15d, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
+; SSE2-NEXT: shlq $55, %r12
; SSE2-NEXT: sarq $63, %r12
-; SSE2-NEXT: movd %r12d, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; SSE2-NEXT: shlq $50, %r13
+; SSE2-NEXT: movd %r12d, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: shlq $60, %r13
; SSE2-NEXT: sarq $63, %r13
; SSE2-NEXT: movd %r13d, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT: shlq $58, %rbx
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT: shlq $61, %rbx
; SSE2-NEXT: sarq $63, %rbx
-; SSE2-NEXT: movd %ebx, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSE2-NEXT: shlq $54, %rax
+; SSE2-NEXT: movd %ebx, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT: shlq $62, %rax
; SSE2-NEXT: sarq $63, %rax
-; SSE2-NEXT: movd %eax, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSE2-NEXT: shlq $62, %rcx
+; SSE2-NEXT: movd %eax, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; SSE2-NEXT: shlq $63, %rcx
; SSE2-NEXT: sarq $63, %rcx
-; SSE2-NEXT: movd %ecx, %xmm4
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: shlq $52, %rsi
-; SSE2-NEXT: sarq $63, %rsi
-; SSE2-NEXT: movd %esi, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT: shlq $60, %rdi
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: shlq $58, %rdx
+; SSE2-NEXT: sarq $63, %rdx
+; SSE2-NEXT: movd %edx, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSE2-NEXT: shlq $59, %rdi
; SSE2-NEXT: sarq $63, %rdi
-; SSE2-NEXT: movd %edi, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: shrq $15, %rbp
+; SSE2-NEXT: movd %edi, %xmm4
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT: shlq $57, %rbp
+; SSE2-NEXT: sarq $63, %rbp
; SSE2-NEXT: movd %ebp, %xmm2
-; SSE2-NEXT: shrq $7, %rdx
-; SSE2-NEXT: movd %edx, %xmm5
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT: shrq $7, %rsi
+; SSE2-NEXT: movd %esi, %xmm5
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE2-NEXT: popq %rbx
; SSE2-NEXT: popq %r12
; SSE2-NEXT: popq %r13
@@ -3728,162 +3728,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; SSSE3-NEXT: pushq %r13
; SSSE3-NEXT: pushq %r12
; SSSE3-NEXT: pushq %rbx
-; SSSE3-NEXT: movswq (%rdi), %rbx
-; SSSE3-NEXT: movq %rbx, %r10
-; SSSE3-NEXT: movq %rbx, %r8
-; SSSE3-NEXT: movq %rbx, %r9
-; SSSE3-NEXT: movq %rbx, %r11
-; SSSE3-NEXT: movq %rbx, %r14
-; SSSE3-NEXT: movq %rbx, %r15
-; SSSE3-NEXT: movq %rbx, %r12
-; SSSE3-NEXT: movq %rbx, %r13
-; SSSE3-NEXT: movq %rbx, %rdx
-; SSSE3-NEXT: movq %rbx, %rsi
-; SSSE3-NEXT: movq %rbx, %rcx
-; SSSE3-NEXT: movq %rbx, %rbp
-; SSSE3-NEXT: movq %rbx, %rax
-; SSSE3-NEXT: shlq $49, %rax
-; SSSE3-NEXT: sarq $63, %rax
-; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movq %rbx, %rax
-; SSSE3-NEXT: shlq $57, %r10
+; SSSE3-NEXT: movswq (%rdi), %rax
+; SSSE3-NEXT: movq %rax, %r10
+; SSSE3-NEXT: movq %rax, %r8
+; SSSE3-NEXT: movq %rax, %r9
+; SSSE3-NEXT: movq %rax, %r11
+; SSSE3-NEXT: movq %rax, %r14
+; SSSE3-NEXT: movq %rax, %r15
+; SSSE3-NEXT: movq %rax, %r12
+; SSSE3-NEXT: movq %rax, %r13
+; SSSE3-NEXT: movq %rax, %rdx
+; SSSE3-NEXT: movq %rax, %rsi
+; SSSE3-NEXT: movq %rax, %rcx
+; SSSE3-NEXT: movq %rax, %rbp
+; SSSE3-NEXT: movq %rax, %rbx
+; SSSE3-NEXT: shrq $15, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm0
+; SSSE3-NEXT: movq %rax, %rbx
+; SSSE3-NEXT: shlq $49, %r10
; SSSE3-NEXT: sarq $63, %r10
; SSSE3-NEXT: movd %r10d, %xmm15
-; SSSE3-NEXT: movq %rbx, %r10
-; SSSE3-NEXT: movsbq %bl, %rbx
+; SSSE3-NEXT: movq %rax, %r10
+; SSSE3-NEXT: movsbq %al, %rax
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
-; SSSE3-NEXT: shlq $53, %r8
+; SSSE3-NEXT: shlq $50, %r8
; SSSE3-NEXT: sarq $63, %r8
; SSSE3-NEXT: movd %r8d, %xmm8
-; SSSE3-NEXT: shlq $61, %r9
+; SSSE3-NEXT: shlq $51, %r9
; SSSE3-NEXT: sarq $63, %r9
-; SSSE3-NEXT: movd %r9d, %xmm2
-; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: movd %r9d, %xmm3
+; SSSE3-NEXT: shlq $52, %r11
; SSSE3-NEXT: sarq $63, %r11
; SSSE3-NEXT: movd %r11d, %xmm9
-; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: shlq $53, %r14
; SSSE3-NEXT: sarq $63, %r14
-; SSSE3-NEXT: movd %r14d, %xmm5
-; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: movd %r14d, %xmm6
+; SSSE3-NEXT: shlq $54, %r15
; SSSE3-NEXT: sarq $63, %r15
; SSSE3-NEXT: movd %r15d, %xmm10
-; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: shlq $55, %r12
; SSSE3-NEXT: sarq $63, %r12
-; SSSE3-NEXT: movd %r12d, %xmm0
-; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: movd %r12d, %xmm2
+; SSSE3-NEXT: shlq $60, %r13
; SSSE3-NEXT: sarq $63, %r13
; SSSE3-NEXT: movd %r13d, %xmm11
-; SSSE3-NEXT: shlq $58, %rdx
+; SSSE3-NEXT: shlq $61, %rdx
; SSSE3-NEXT: sarq $63, %rdx
-; SSSE3-NEXT: movd %edx, %xmm4
-; SSSE3-NEXT: shlq $54, %rsi
+; SSSE3-NEXT: movd %edx, %xmm5
+; SSSE3-NEXT: shlq $62, %rsi
; SSSE3-NEXT: sarq $63, %rsi
; SSSE3-NEXT: movd %esi, %xmm12
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: shlq $63, %rcx
; SSSE3-NEXT: sarq $63, %rcx
-; SSSE3-NEXT: movd %ecx, %xmm6
-; SSSE3-NEXT: shlq $52, %rbp
+; SSSE3-NEXT: movd %ecx, %xmm0
+; SSSE3-NEXT: shlq $58, %rbp
; SSSE3-NEXT: sarq $63, %rbp
; SSSE3-NEXT: movd %ebp, %xmm13
-; SSSE3-NEXT: shlq $60, %rax
-; SSSE3-NEXT: sarq $63, %rax
-; SSSE3-NEXT: movd %eax, %xmm7
-; SSSE3-NEXT: shrq $15, %r10
-; SSSE3-NEXT: movd %r10d, %xmm14
-; SSSE3-NEXT: shrq $7, %rbx
-; SSSE3-NEXT: movd %ebx, %xmm3
-; SSSE3-NEXT: movswq 2(%rdi), %rdx
-; SSSE3-NEXT: movq %rdx, %r8
-; SSSE3-NEXT: movq %rdx, %r9
-; SSSE3-NEXT: movq %rdx, %r10
-; SSSE3-NEXT: movq %rdx, %r11
-; SSSE3-NEXT: movq %rdx, %r14
-; SSSE3-NEXT: movq %rdx, %r15
-; SSSE3-NEXT: movq %rdx, %r12
-; SSSE3-NEXT: movq %rdx, %r13
-; SSSE3-NEXT: movq %rdx, %rbx
-; SSSE3-NEXT: movq %rdx, %rax
-; SSSE3-NEXT: movq %rdx, %rcx
-; SSSE3-NEXT: movq %rdx, %rsi
-; SSSE3-NEXT: movq %rdx, %rdi
-; SSSE3-NEXT: movq %rdx, %rbp
-; SSSE3-NEXT: shlq $49, %rbp
-; SSSE3-NEXT: sarq $63, %rbp
+; SSSE3-NEXT: shlq $59, %rbx
+; SSSE3-NEXT: sarq $63, %rbx
+; SSSE3-NEXT: movd %ebx, %xmm7
+; SSSE3-NEXT: shlq $57, %r10
+; SSSE3-NEXT: sarq $63, %r10
+; SSSE3-NEXT: movd %r10d, %xmm4
+; SSSE3-NEXT: shrq $7, %rax
+; SSSE3-NEXT: movd %eax, %xmm14
+; SSSE3-NEXT: movswq 2(%rdi), %rsi
+; SSSE3-NEXT: movq %rsi, %r8
+; SSSE3-NEXT: movq %rsi, %r9
+; SSSE3-NEXT: movq %rsi, %r10
+; SSSE3-NEXT: movq %rsi, %r11
+; SSSE3-NEXT: movq %rsi, %r14
+; SSSE3-NEXT: movq %rsi, %r15
+; SSSE3-NEXT: movq %rsi, %r12
+; SSSE3-NEXT: movq %rsi, %r13
+; SSSE3-NEXT: movq %rsi, %rbx
+; SSSE3-NEXT: movq %rsi, %rax
+; SSSE3-NEXT: movq %rsi, %rcx
+; SSSE3-NEXT: movq %rsi, %rdx
+; SSSE3-NEXT: movq %rsi, %rdi
+; SSSE3-NEXT: movq %rsi, %rbp
+; SSSE3-NEXT: shrq $15, %rbp
; SSSE3-NEXT: movd %ebp, %xmm1
-; SSSE3-NEXT: movq %rdx, %rbp
-; SSSE3-NEXT: movsbq %dl, %rdx
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSSE3-NEXT: movq %rsi, %rbp
+; SSSE3-NEXT: movsbq %sil, %rsi
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
-; SSSE3-NEXT: shlq $57, %r8
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
+; SSSE3-NEXT: shlq $49, %r8
; SSSE3-NEXT: sarq $63, %r8
-; SSSE3-NEXT: movd %r8d, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
-; SSSE3-NEXT: shlq $53, %r9
+; SSSE3-NEXT: movd %r8d, %xmm3
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; SSSE3-NEXT: shlq $50, %r9
; SSSE3-NEXT: sarq $63, %r9
-; SSSE3-NEXT: movd %r9d, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT: shlq $61, %r10
+; SSSE3-NEXT: movd %r9d, %xmm4
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSSE3-NEXT: shlq $51, %r10
; SSSE3-NEXT: sarq $63, %r10
-; SSSE3-NEXT: movd %r10d, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
-; SSSE3-NEXT: shlq $51, %r11
+; SSSE3-NEXT: movd %r10d, %xmm5
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT: shlq $52, %r11
; SSSE3-NEXT: sarq $63, %r11
-; SSSE3-NEXT: movd %r11d, %xmm5
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT: shlq $59, %r14
+; SSSE3-NEXT: movd %r11d, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSSE3-NEXT: shlq $53, %r14
; SSSE3-NEXT: sarq $63, %r14
-; SSSE3-NEXT: movd %r14d, %xmm6
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT: shlq $55, %r15
+; SSSE3-NEXT: movd %r14d, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSSE3-NEXT: shlq $54, %r15
; SSSE3-NEXT: sarq $63, %r15
-; SSSE3-NEXT: movd %r15d, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; SSSE3-NEXT: shlq $63, %r12
+; SSSE3-NEXT: movd %r15d, %xmm4
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
+; SSSE3-NEXT: shlq $55, %r12
; SSSE3-NEXT: sarq $63, %r12
-; SSSE3-NEXT: movd %r12d, %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
-; SSSE3-NEXT: shlq $50, %r13
+; SSSE3-NEXT: movd %r12d, %xmm3
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: shlq $60, %r13
; SSSE3-NEXT: sarq $63, %r13
; SSSE3-NEXT: movd %r13d, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT: shlq $58, %rbx
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT: shlq $61, %rbx
; SSSE3-NEXT: sarq $63, %rbx
-; SSSE3-NEXT: movd %ebx, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
-; SSSE3-NEXT: shlq $54, %rax
+; SSSE3-NEXT: movd %ebx, %xmm4
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT: shlq $62, %rax
; SSSE3-NEXT: sarq $63, %rax
-; SSSE3-NEXT: movd %eax, %xmm5
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
-; SSSE3-NEXT: shlq $62, %rcx
+; SSSE3-NEXT: movd %eax, %xmm6
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; SSSE3-NEXT: shlq $63, %rcx
; SSSE3-NEXT: sarq $63, %rcx
-; SSSE3-NEXT: movd %ecx, %xmm4
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: shlq $52, %rsi
-; SSSE3-NEXT: sarq $63, %rsi
-; SSSE3-NEXT: movd %esi, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT: shlq $60, %rdi
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT: shlq $58, %rdx
+; SSSE3-NEXT: sarq $63, %rdx
+; SSSE3-NEXT: movd %edx, %xmm2
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; SSSE3-NEXT: shlq $59, %rdi
; SSSE3-NEXT: sarq $63, %rdi
-; SSSE3-NEXT: movd %edi, %xmm3
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT: shrq $15, %rbp
+; SSSE3-NEXT: movd %edi, %xmm4
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT: shlq $57, %rbp
+; SSSE3-NEXT: sarq $63, %rbp
; SSSE3-NEXT: movd %ebp, %xmm2
-; SSSE3-NEXT: shrq $7, %rdx
-; SSSE3-NEXT: movd %edx, %xmm5
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT: shrq $7, %rsi
+; SSSE3-NEXT: movd %esi, %xmm5
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSSE3-NEXT: popq %rbx
; SSSE3-NEXT: popq %r12
; SSSE3-NEXT: popq %r13
diff --git a/test/CodeGen/X86/vector-shuffle-v48.ll b/test/CodeGen/X86/vector-shuffle-v48.ll
new file mode 100644
index 000000000000..9bd75148ecd1
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-v48.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx2 < %s | FileCheck %s
+define <16 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) {
+; CHECK-LABEL: foo:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovdqu (%rdi), %ymm4
+; CHECK-NEXT: vmovdqu 32(%rdi), %xmm5
+; CHECK-NEXT: vpextrb $13, %xmm5, %eax
+; CHECK-NEXT: vpextrb $10, %xmm5, %ecx
+; CHECK-NEXT: vpextrb $7, %xmm5, %edx
+; CHECK-NEXT: vpextrb $4, %xmm5, %esi
+; CHECK-NEXT: vpextrb $1, %xmm5, %edi
+; CHECK-NEXT: vextracti128 $1, %ymm4, %xmm5
+; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm5[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm4[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm6 = xmm7[0],xmm6[0]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7]
+; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4
+; CHECK-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
+; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,11,14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpinsrb $3, %edi, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $4, %esi, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $6, %ecx, %xmm5, %xmm5
+; CHECK-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
+; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero
+; CHECK-NEXT: vpmulld %ymm0, %ymm4, %ymm0
+; CHECK-NEXT: vpmulld %ymm1, %ymm5, %ymm1
+; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
+; CHECK-NEXT: vpshufb %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT: vpshufb %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; CHECK-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %1 = load <48 x i8>, <48 x i8>* %x0, align 1
+ %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45>
+ %3 = zext <16 x i8> %2 to <16 x i32>
+ %4 = mul <16 x i32> %3, %x1
+ %5 = lshr <16 x i32> %4, %x2
+ %6 = trunc <16 x i32> %5 to <16 x i8>
+ ret <16 x i8> %6
+}
diff --git a/test/CodeGen/X86/vector-shuffle-variable-128.ll b/test/CodeGen/X86/vector-shuffle-variable-128.ll
index bde8a16d2a5a..452f387a4fee 100644
--- a/test/CodeGen/X86/vector-shuffle-variable-128.ll
+++ b/test/CodeGen/X86/vector-shuffle-variable-128.ll
@@ -83,7 +83,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -103,7 +103,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3
; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32:
@@ -168,7 +168,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -188,7 +188,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32:
@@ -257,27 +257,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; SSE2-NEXT: andl $7, %eax
; SSE2-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: movzwl -24(%rsp,%rdx,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax
+; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: movzwl -24(%rsp,%rdi,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
@@ -301,27 +301,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; SSSE3-NEXT: andl $7, %eax
; SSSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: movzwl -24(%rsp,%rdx,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax
+; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: movzwl -24(%rsp,%rdi,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
@@ -425,67 +425,67 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm9
-; SSE2-NEXT: andl $15, %ecx
-; SSE2-NEXT: movzbl (%rcx,%r10), %eax
+; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT: andl $15, %eax
+; SSE2-NEXT: movzbl (%rax,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm10
-; SSE2-NEXT: andl $15, %r9d
-; SSE2-NEXT: movzbl (%r9,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm7
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm11
-; SSE2-NEXT: andl $15, %esi
-; SSE2-NEXT: movzbl (%rsi,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm6
+; SSE2-NEXT: movd %eax, %xmm7
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm12
+; SSE2-NEXT: movd %eax, %xmm11
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: movd %eax, %xmm6
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm13
-; SSE2-NEXT: andl $15, %edx
-; SSE2-NEXT: movzbl (%rdx,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: movd %eax, %xmm12
; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: andl $15, %eax
; SSE2-NEXT: movzbl (%rax,%r10), %eax
-; SSE2-NEXT: movd %eax, %xmm14
+; SSE2-NEXT: movd %eax, %xmm5
+; SSE2-NEXT: andl $15, %r9d
+; SSE2-NEXT: movzbl (%r9,%r10), %eax
+; SSE2-NEXT: movd %eax, %xmm13
; SSE2-NEXT: andl $15, %r8d
; SSE2-NEXT: movzbl (%r8,%r10), %eax
+; SSE2-NEXT: movd %eax, %xmm4
+; SSE2-NEXT: andl $15, %ecx
+; SSE2-NEXT: movzbl (%rcx,%r10), %eax
+; SSE2-NEXT: movd %eax, %xmm14
+; SSE2-NEXT: andl $15, %edx
+; SSE2-NEXT: movzbl (%rdx,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT: andl $15, %eax
-; SSE2-NEXT: movzbl (%rax,%r10), %eax
+; SSE2-NEXT: andl $15, %esi
+; SSE2-NEXT: movzbl (%rsi,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: andl $15, %edi
; SSE2-NEXT: movzbl (%rdi,%r10), %eax
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -510,67 +510,67 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm9
-; SSSE3-NEXT: andl $15, %ecx
-; SSSE3-NEXT: movzbl (%rcx,%r10), %eax
+; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT: andl $15, %eax
+; SSSE3-NEXT: movzbl (%rax,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm3
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm10
-; SSSE3-NEXT: andl $15, %r9d
-; SSSE3-NEXT: movzbl (%r9,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm7
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm11
-; SSSE3-NEXT: andl $15, %esi
-; SSSE3-NEXT: movzbl (%rsi,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm6
+; SSSE3-NEXT: movd %eax, %xmm7
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm12
+; SSSE3-NEXT: movd %eax, %xmm11
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm5
+; SSSE3-NEXT: movd %eax, %xmm6
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm13
-; SSSE3-NEXT: andl $15, %edx
-; SSSE3-NEXT: movzbl (%rdx,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: movd %eax, %xmm12
; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
; SSSE3-NEXT: andl $15, %eax
; SSSE3-NEXT: movzbl (%rax,%r10), %eax
-; SSSE3-NEXT: movd %eax, %xmm14
+; SSSE3-NEXT: movd %eax, %xmm5
+; SSSE3-NEXT: andl $15, %r9d
+; SSSE3-NEXT: movzbl (%r9,%r10), %eax
+; SSSE3-NEXT: movd %eax, %xmm13
; SSSE3-NEXT: andl $15, %r8d
; SSSE3-NEXT: movzbl (%r8,%r10), %eax
+; SSSE3-NEXT: movd %eax, %xmm4
+; SSSE3-NEXT: andl $15, %ecx
+; SSSE3-NEXT: movzbl (%rcx,%r10), %eax
+; SSSE3-NEXT: movd %eax, %xmm14
+; SSSE3-NEXT: andl $15, %edx
+; SSSE3-NEXT: movzbl (%rdx,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
-; SSSE3-NEXT: andl $15, %eax
-; SSSE3-NEXT: movzbl (%rax,%r10), %eax
+; SSSE3-NEXT: andl $15, %esi
+; SSSE3-NEXT: movzbl (%rsi,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: andl $15, %edi
; SSSE3-NEXT: movzbl (%rdi,%r10), %eax
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -739,7 +739,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -759,7 +759,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32:
@@ -824,23 +824,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm8
-; SSE2-NEXT: movzbl 7(%rdi), %edx
+; SSE2-NEXT: movzbl 14(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm15
-; SSE2-NEXT: movzbl 11(%rdi), %edx
+; SSE2-NEXT: movzbl 13(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm9
-; SSE2-NEXT: movzbl 3(%rdi), %edx
+; SSE2-NEXT: movzbl 12(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm3
-; SSE2-NEXT: movzbl 13(%rdi), %edx
+; SSE2-NEXT: movzbl 11(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm10
-; SSE2-NEXT: movzbl 5(%rdi), %edx
+; SSE2-NEXT: movzbl 10(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm7
@@ -848,11 +848,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm11
-; SSE2-NEXT: movzbl 1(%rdi), %edx
+; SSE2-NEXT: movzbl 8(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm6
-; SSE2-NEXT: movzbl 14(%rdi), %edx
+; SSE2-NEXT: movzbl 7(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm12
@@ -860,23 +860,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm5
-; SSE2-NEXT: movzbl 10(%rdi), %edx
+; SSE2-NEXT: movzbl 5(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm13
-; SSE2-NEXT: movzbl 2(%rdi), %edx
+; SSE2-NEXT: movzbl 4(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm4
-; SSE2-NEXT: movzbl 12(%rdi), %edx
+; SSE2-NEXT: movzbl 3(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm14
-; SSE2-NEXT: movzbl 4(%rdi), %edx
+; SSE2-NEXT: movzbl 2(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm1
-; SSE2-NEXT: movzbl 8(%rdi), %edx
+; SSE2-NEXT: movzbl 1(%rdi), %edx
; SSE2-NEXT: andl $15, %edx
; SSE2-NEXT: movzbl (%rdx,%rcx), %edx
; SSE2-NEXT: movd %edx, %xmm2
@@ -885,19 +885,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -909,23 +909,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm8
-; SSSE3-NEXT: movzbl 7(%rdi), %edx
+; SSSE3-NEXT: movzbl 14(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm15
-; SSSE3-NEXT: movzbl 11(%rdi), %edx
+; SSSE3-NEXT: movzbl 13(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm9
-; SSSE3-NEXT: movzbl 3(%rdi), %edx
+; SSSE3-NEXT: movzbl 12(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm3
-; SSSE3-NEXT: movzbl 13(%rdi), %edx
+; SSSE3-NEXT: movzbl 11(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm10
-; SSSE3-NEXT: movzbl 5(%rdi), %edx
+; SSSE3-NEXT: movzbl 10(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm7
@@ -933,11 +933,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm11
-; SSSE3-NEXT: movzbl 1(%rdi), %edx
+; SSSE3-NEXT: movzbl 8(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm6
-; SSSE3-NEXT: movzbl 14(%rdi), %edx
+; SSSE3-NEXT: movzbl 7(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm12
@@ -945,23 +945,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm5
-; SSSE3-NEXT: movzbl 10(%rdi), %edx
+; SSSE3-NEXT: movzbl 5(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm13
-; SSSE3-NEXT: movzbl 2(%rdi), %edx
+; SSSE3-NEXT: movzbl 4(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm4
-; SSSE3-NEXT: movzbl 12(%rdi), %edx
+; SSSE3-NEXT: movzbl 3(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm14
-; SSSE3-NEXT: movzbl 4(%rdi), %edx
+; SSSE3-NEXT: movzbl 2(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm1
-; SSSE3-NEXT: movzbl 8(%rdi), %edx
+; SSSE3-NEXT: movzbl 1(%rdi), %edx
; SSSE3-NEXT: andl $15, %edx
; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx
; SSSE3-NEXT: movd %edx, %xmm2
@@ -970,19 +970,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8*
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
@@ -1225,28 +1225,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
; SSE2-NEXT: andl $7, %ecx
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: andl $7, %r8d
-; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: andl $7, %r9d
; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
-; SSE2-NEXT: movd %eax, %xmm3
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; SSE2-NEXT: movzwl -40(%rsp,%rdx,2), %eax
-; SSE2-NEXT: movd %eax, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT: movzwl -40(%rsp,%r8,2), %eax
; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: movzwl -40(%rsp,%rdi,2), %eax
; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movzwl -40(%rsp,%r8,2), %eax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
@@ -1263,28 +1262,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
; SSSE3-NEXT: andl $7, %ecx
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $7, %r8d
-; SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: andl $7, %r9d
; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm3
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
; SSSE3-NEXT: movzwl -40(%rsp,%rdx,2), %eax
-; SSSE3-NEXT: movd %eax, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT: movzwl -40(%rsp,%r8,2), %eax
; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax
+; SSSE3-NEXT: movd %eax, %xmm2
; SSSE3-NEXT: movzwl -40(%rsp,%rdi,2), %eax
; SSSE3-NEXT: movd %eax, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax
+; SSSE3-NEXT: movd %eax, %xmm1
+; SSSE3-NEXT: movzwl -40(%rsp,%r8,2), %eax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
diff --git a/test/CodeGen/X86/vector-sqrt.ll b/test/CodeGen/X86/vector-sqrt.ll
index 13088b7fa5f2..c5ac4466b5fa 100644
--- a/test/CodeGen/X86/vector-sqrt.ll
+++ b/test/CodeGen/X86/vector-sqrt.ll
@@ -5,10 +5,8 @@
define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtd2:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
entry:
@@ -29,14 +27,10 @@ declare double @sqrt(double) local_unnamed_addr #1
define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
; CHECK-LABEL: sqrtf4:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
-; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1
+; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2
+; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
diff --git a/test/CodeGen/X86/vector-unsigned-cmp.ll b/test/CodeGen/X86/vector-unsigned-cmp.ll
index fc246669992c..3e4b9aedf2b8 100644
--- a/test/CodeGen/X86/vector-unsigned-cmp.ll
+++ b/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -13,7 +13,7 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
@@ -30,9 +30,6 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@@ -46,7 +43,7 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -63,9 +60,6 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
@@ -79,7 +73,7 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -98,9 +92,6 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -116,7 +107,7 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlq $1, %xmm0
; SSE-NEXT: psrlq $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm0, %xmm2
@@ -135,9 +126,6 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -153,31 +141,15 @@ define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: psrld $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE-NEXT: pxor %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: ugt_v4i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: ugt_v4i32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: ugt_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
%cmp = icmp ugt <4 x i32> %sh1, %sh2
@@ -189,32 +161,16 @@ define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: psrld $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pcmpgtd %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: ult_v4i32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: ult_v4i32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: retq
+; AVX-LABEL: ult_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
%sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
%sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
%cmp = icmp ult <4 x i32> %sh1, %sh2
@@ -226,12 +182,9 @@ define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # BB#0:
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: psrld $1, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: uge_v4i32:
@@ -260,9 +213,6 @@ define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2: # BB#0:
; SSE2-NEXT: psrld $1, %xmm0
; SSE2-NEXT: psrld $1, %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm2, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm1, %xmm0
@@ -294,9 +244,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT: pxor %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
; SSE-NEXT: retq
;
@@ -304,9 +251,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -320,20 +264,14 @@ define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE-NEXT: pxor %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm2
-; SSE-NEXT: pcmpgtw %xmm0, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pcmpgtw %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ult_v8i16:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -408,22 +346,20 @@ define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-LABEL: ugt_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: por %xmm2, %xmm1
-; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: ugt_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -436,11 +372,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE-LABEL: ult_v16i8:
; SSE: # BB#0:
; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pand %xmm2, %xmm0
; SSE-NEXT: psrlw $1, %xmm1
-; SSE-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pand %xmm1, %xmm2
; SSE-NEXT: pcmpgtb %xmm0, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: retq
@@ -448,11 +383,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
; AVX-LABEL: ult_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
%sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
index 0eb17fb6c14d..c1d242575253 100644
--- a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
+++ b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
@@ -15,5 +15,5 @@ body:
; PRE-RA-NEXT: - { reg: '%esi', virtual-reg: '%1' }
; POST-RA: liveins:
-; POST-RA-NEXT: - { reg: '%edi' }
-; POST-RA-NEXT: - { reg: '%esi' }
+; POST-RA-NEXT: - { reg: '%edi', virtual-reg: '' }
+; POST-RA-NEXT: - { reg: '%esi', virtual-reg: '' }
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index c9a34de12369..a31adc337906 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: psllq %xmm2, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
;
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 88cba8a4d6ac..a381637b40a9 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # BB#0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
-; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X32-NEXT: psrlq %xmm2, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
;
diff --git a/test/CodeGen/X86/x86-interleaved-access.ll b/test/CodeGen/X86/x86-interleaved-access.ll
index 4181a374c61c..74214aa1b8b7 100644
--- a/test/CodeGen/X86/x86-interleaved-access.ll
+++ b/test/CodeGen/X86/x86-interleaved-access.ll
@@ -135,3 +135,96 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) {
%add3 = add <4 x i64> %add2, %strided.v3
ret <4 x i64> %add3
}
+
+define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
+; AVX-LABEL: store_factorf64_4:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm2[0],xmm3[0]
+; AVX-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm5 = xmm0[0],xmm1[0]
+; AVX-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3]
+; AVX-NEXT: vunpckhpd {{.*#+}} xmm5 = xmm2[1],xmm3[1]
+; AVX-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
+; AVX-NEXT: vunpckhpd {{.*#+}} xmm6 = xmm0[1],xmm1[1]
+; AVX-NEXT: vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3]
+; AVX-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX-NEXT: vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3]
+; AVX-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3]
+; AVX-NEXT: vmovupd %ymm0, 96(%rdi)
+; AVX-NEXT: vmovupd %ymm6, 64(%rdi)
+; AVX-NEXT: vmovupd %ymm5, 32(%rdi)
+; AVX-NEXT: vmovupd %ymm4, (%rdi)
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16
+ ret void
+}
+
+define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
+; AVX1-LABEL: store_factori64_4:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm2[0],xmm3[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3]
+; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm2[1],xmm3[1]
+; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5
+; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm6 = xmm0[1],xmm1[1]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3]
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7
+; AVX1-NEXT: vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3]
+; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3]
+; AVX1-NEXT: vmovupd %ymm0, 96(%rdi)
+; AVX1-NEXT: vmovupd %ymm6, 64(%rdi)
+; AVX1-NEXT: vmovupd %ymm5, 32(%rdi)
+; AVX1-NEXT: vmovupd %ymm4, (%rdi)
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: store_factori64_4:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm5
+; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm1[0,2,2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm5 = xmm5[0,1],xmm6[2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7]
+; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm5 = ymm2[1],ymm3[1],ymm2[3],ymm3[3]
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm6
+; AVX2-NEXT: vpermq {{.*#+}} ymm7 = ymm0[3,1,2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm6
+; AVX2-NEXT: vpbroadcastq %xmm3, %ymm7
+; AVX2-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm7[6,7]
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1,2,3],ymm6[4,5,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,1,1,3]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7]
+; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
+; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
+; AVX2-NEXT: vmovdqu %ymm6, (%rdi)
+; AVX2-NEXT: vmovdqu %ymm5, 96(%rdi)
+; AVX2-NEXT: vmovdqu %ymm4, 64(%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+ %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
+ store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16
+ ret void
+}