diff options
Diffstat (limited to 'test/CodeGen/X86')
84 files changed, 7887 insertions, 3236 deletions
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index b1deb2c5f567..e04d10c9d64a 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \ -; RUN: grep "asm-printer" | grep 35 +; RUN: grep "asm-printer" | grep 33 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll b/test/CodeGen/X86/GlobalISel/irtranslator-call.ll deleted file mode 100644 index 6c60aed67a7b..000000000000 --- a/test/CodeGen/X86/GlobalISel/irtranslator-call.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: llc -mtriple i386 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s -; RUN: llc -mtriple x86_64 -global-isel -stop-after=irtranslator %s -o - | FileCheck %s - -define void @test_void_return() { -; CHECK-LABEL: name: test_void_return -; CHECK: alignment: 4 -; CHECK-NEXT: exposesReturnsTwice: false -; CHECK-NEXT: legalized: false -; CHECK-NEXT: regBankSelected: false -; CHECK-NEXT: selected: false -; CHECK-NEXT: tracksRegLiveness: true -; CHECK-NEXT: frameInfo: -; CHECK-NEXT: isFrameAddressTaken: false -; CHECK-NEXT: isReturnAddressTaken: false -; CHECK-NEXT: hasStackMap: false -; CHECK-NEXT: hasPatchPoint: false -; CHECK-NEXT: stackSize: 0 -; CHECK-NEXT: offsetAdjustment: 0 -; CHECK-NEXT: maxAlignment: 0 -; CHECK-NEXT: adjustsStack: false -; CHECK-NEXT: hasCalls: false -; CHECK-NEXT: hasOpaqueSPAdjustment: false -; CHECK-NEXT: hasVAStart: false -; CHECK-NEXT: hasMustTailInVarArgFunc: false -; CHECK-NEXT: body: -; CHECK-NEXT: bb.1.entry: -; CHECK-NEXT: RET 0 -entry: - ret void -} diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index 8ea3e4f9d739..00aa7cf84e55 100644 --- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=i386-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc -mtriple=i386-linux-gnu -mattr=+sse2 -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X32 +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 @a1_8bit = external global i8 @a7_8bit = external global i8 @@ -11,8 +11,8 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, ; ALL-LABEL: name: test_i8_args_8 ; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false +; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true, +; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true, ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]](s8) = COPY %edi ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi @@ -26,14 +26,14 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, ; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 1, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 1, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 1, alignment: 8, isImmutable: true, +; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 1, alignment: 16, isImmutable: true, +; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 1, alignment: 8, isImmutable: true, +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 1, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s8) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -77,8 +77,8 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, ; ALL-LABEL: name: test_i32_args_8 ; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false +; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true, +; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]](s32) = COPY %edi ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi @@ -92,14 +92,14 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, ; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -142,8 +142,8 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, ; ALL-LABEL: name: test_i64_args_8 ; X64: fixedStack: -; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false -; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false +; X64: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true, +; X64: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true, ; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9 ; X64: [[ARG1:%[0-9]+]](s64) = COPY %rdi ; X64-NEXT: %{{[0-9]+}}(s64) = COPY %rsi @@ -157,22 +157,22 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, ; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) ; X32: fixedStack: -; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK60:[0-9]+]], type: default, offset: 60, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK56:[0-9]+]], type: default, offset: 56, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK52:[0-9]+]], type: default, offset: 52, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK48:[0-9]+]], type: default, offset: 48, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK44:[0-9]+]], type: default, offset: 44, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK40:[0-9]+]], type: default, offset: 40, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK36:[0-9]+]], type: default, offset: 36, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK32:[0-9]+]], type: default, offset: 32, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK28:[0-9]+]], type: default, offset: 28, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK24:[0-9]+]], type: default, offset: 24, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK20:[0-9]+]], type: default, offset: 20, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 4, alignment: 16, isImmutable: true, +; X32: id: [[STACK12:[0-9]+]], type: default, offset: 12, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 4, alignment: 8, isImmutable: true, +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1L:%[0-9]+]](s32) = G_LOAD [[ARG1L_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) @@ -249,8 +249,8 @@ define float @test_float_args(float %arg1, float %arg2) { ; X64-NEXT: RET 0, implicit %xmm0 ; X32: fixedStack: -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 4, size: 4, alignment: 4, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -270,8 +270,8 @@ define double @test_double_args(double %arg1, double %arg2) { ; X64-NEXT: RET 0, implicit %xmm0 ; X32: fixedStack: -; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK4:[0-9]+]], type: default, offset: 8, size: 8, alignment: 8, isImmutable: true, +; X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, alignment: 16, isImmutable: true, ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s64) = G_LOAD [[ARG1_ADDR:%[0-9]+]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) ; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] @@ -282,6 +282,38 @@ define double @test_double_args(double %arg1, double %arg2) { ret double %arg2 } +define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { +; ALL: name: test_v4i32_args +; ALL: liveins: %xmm0, %xmm1 +; ALL: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0 +; ALL-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1 +; ALL-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>) +; ALL-NEXT: RET 0, implicit %xmm0 + ret <4 x i32> %arg2 +} + +define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { +; ALL: name: test_v8i32_args +; ALL: liveins: %xmm0, %xmm1 +; ALL: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0 +; ALL-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1 +; ALL-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>) +; ALL-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>) +; ALL-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>) +; ALL-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>) +; ALL-NEXT: RET 0, implicit %xmm0, implicit %xmm1 + + ret <8 x i32> %arg1 +} + +define void @test_void_return() { +; ALL-LABEL: name: test_void_return +; ALL: bb.1.entry: +; ALL-NEXT: RET 0 +entry: + ret void +} + define i32 * @test_memop_i32(i32 * %p1) { ; ALL-LABEL:name: test_memop_i32 ;X64 liveins: %rdi @@ -290,7 +322,7 @@ define i32 * @test_memop_i32(i32 * %p1) { ;X64-NEXT: RET 0, implicit %rax ;X32: fixedStack: -;X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +;X32: id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 4, alignment: 16, isImmutable: true, ;X32: %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ;X32-NEXT: %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) ;X32-NEXT: %eax = COPY %0(p0) diff --git a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll b/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll deleted file mode 100644 index 90a05f5fc225..000000000000 --- a/test/CodeGen/X86/GlobalISel/irtranslator-callingconv_64bit.ll +++ /dev/null @@ -1,25 +0,0 @@ -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel -stop-after=irtranslator < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=X64 - -define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { -; X64: name: test_v4i32_args -; X64: liveins: %xmm0, %xmm1 -; X64: [[ARG1:%[0-9]+]](<4 x s32>) = COPY %xmm0 -; X64-NEXT: [[ARG2:%[0-9]+]](<4 x s32>) = COPY %xmm1 -; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](<4 x s32>) -; X64-NEXT: RET 0, implicit %xmm0 - ret <4 x i32> %arg2 -} - -define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { -; X64: name: test_v8i32_args -; X64: liveins: %xmm0, %xmm1 -; X64: [[ARG1L:%[0-9]+]](<4 x s32>) = COPY %xmm0 -; X64-NEXT: [[ARG1H:%[0-9]+]](<4 x s32>) = COPY %xmm1 -; X64-NEXT: [[ARG1:%[0-9]+]](<8 x s32>) = G_MERGE_VALUES [[ARG1L]](<4 x s32>), [[ARG1H]](<4 x s32>) -; X64-NEXT: [[RETL:%[0-9]+]](<4 x s32>), [[RETH:%[0-9]+]](<4 x s32>) = G_UNMERGE_VALUES [[ARG1:%[0-9]+]](<8 x s32>) -; X64-NEXT: %xmm0 = COPY [[RETL:%[0-9]+]](<4 x s32>) -; X64-NEXT: %xmm1 = COPY [[RETH:%[0-9]+]](<4 x s32>) -; X64-NEXT: RET 0, implicit %xmm0, implicit %xmm1 - - ret <8 x i32> %arg1 -} diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir index 0d66a6384107..682d01e66fa0 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-scalar.mir @@ -24,9 +24,9 @@ alignment: 4 legalized: false regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,9 +56,9 @@ alignment: 4 legalized: false regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -88,9 +88,9 @@ alignment: 4 legalized: false regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: _ } -# CHECK-NEXT: - { id: 1, class: _ } -# CHECK-NEXT: - { id: 2, class: _ } +# CHECK-NEXT: - { id: 0, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: _, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir index be62832b008a..effd26e9866d 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v128.mir @@ -26,9 +26,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,9 +56,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -86,9 +86,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir index d99303c3ba3b..5ae8132156d5 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v256.mir @@ -26,9 +26,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,9 +56,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -86,9 +86,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir index 24eefd30c2ac..71ea313c4c72 100644 --- a/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir +++ b/test/CodeGen/X86/GlobalISel/legalize-mul-v512.mir @@ -28,9 +28,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -58,9 +58,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -88,9 +88,9 @@ alignment: 4 legalized: false regBankSelected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: _ } -# ALL-NEXT: - { id: 1, class: _ } -# ALL-NEXT: - { id: 2, class: _ } +# ALL-NEXT: - { id: 0, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: _, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: _, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir index cc03f3a57f0b..ca238b29c2dd 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX2.mir @@ -33,8 +33,8 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_mul_vec256 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -56,8 +56,8 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_vec256 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -79,8 +79,8 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_sub_vec256 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -100,8 +100,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -122,8 +122,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir index 278413ad38ef..c94ecc8e9a8d 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-AVX512.mir @@ -33,8 +33,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -53,8 +53,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -73,8 +73,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -93,8 +93,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: vecr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -115,8 +115,8 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vecr } -# CHECK-NEXT: - { id: 1, class: gpr } +# CHECK-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir index a115d1fa3255..b74e03f0fe79 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-X32.mir @@ -14,11 +14,11 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir index 1ea922ee475a..7bcc57aef4ac 100644 --- a/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir +++ b/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir @@ -145,9 +145,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i8 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -172,9 +172,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i16 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -199,9 +199,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -226,9 +226,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_i64 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -253,14 +253,14 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_mul_gpr # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } -# CHECK: - { id: 3, class: gpr } -# CHECK: - { id: 4, class: gpr } -# CHECK: - { id: 5, class: gpr } -# CHECK: - { id: 6, class: gpr } -# CHECK: - { id: 7, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } +# CHECK: - { id: 2, class: gpr, preferred-register: '' } +# CHECK: - { id: 3, class: gpr, preferred-register: '' } +# CHECK: - { id: 4, class: gpr, preferred-register: '' } +# CHECK: - { id: 5, class: gpr, preferred-register: '' } +# CHECK: - { id: 6, class: gpr, preferred-register: '' } +# CHECK: - { id: 7, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -292,9 +292,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_float # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -319,9 +319,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_double # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -346,9 +346,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_v4i32 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -373,9 +373,9 @@ selected: false tracksRegLiveness: true # CHECK-LABEL: name: test_add_v4f32 # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: vecr } -# CHECK: - { id: 2, class: vecr } +# CHECK: - { id: 0, class: vecr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } +# CHECK: - { id: 2, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -399,8 +399,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i8 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -422,8 +422,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i16 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -445,8 +445,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -469,8 +469,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_i64 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -492,8 +492,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_float # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -515,8 +515,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_double # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -538,8 +538,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_load_v4i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: vecr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -561,8 +561,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_store_i32 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -585,8 +585,8 @@ regBankSelected: false selected: false # CHECK-LABEL: name: test_store_i64 # CHECK: registers: -# CHECK: - { id: 0, class: gpr } -# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 0, class: gpr, preferred-register: '' } +# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -610,12 +610,12 @@ selected: false # CHECK-LABEL: name: test_store_float # CHECK: registers: -# FAST-NEXT: - { id: 0, class: vecr } -# FAST-NEXT: - { id: 1, class: gpr } -# FAST-NEXT: - { id: 2, class: gpr } +# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 0, class: vecr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -647,12 +647,12 @@ selected: false # CHECK-LABEL: name: test_store_double # CHECK: registers: -# FAST-NEXT: - { id: 0, class: vecr } -# FAST-NEXT: - { id: 1, class: gpr } -# FAST-NEXT: - { id: 2, class: gpr } +# FAST-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# FAST-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 0, class: vecr } -# GREEDY-NEXT: - { id: 1, class: gpr } +# GREEDY-NEXT: - { id: 0, class: vecr, preferred-register: '' } +# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } @@ -682,10 +682,10 @@ alignment: 4 legalized: true # CHECK-LABEL: name: constInt_check # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -706,10 +706,10 @@ alignment: 4 legalized: true # CHECK-LABEL: name: trunc_check # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -729,11 +729,11 @@ name: test_gep legalized: true # CHECK-LABEL: name: test_gep # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } -# CHECK-NEXT: - { id: 3, class: gpr } -# CHECK-NEXT: - { id: 4, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -757,9 +757,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -782,9 +782,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -807,9 +807,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -832,9 +832,9 @@ alignment: 4 legalized: true regBankSelected: false # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr } -# CHECK-NEXT: - { id: 1, class: gpr } -# CHECK-NEXT: - { id: 2, class: gpr } +# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } diff --git a/test/CodeGen/X86/GlobalISel/select-add-v128.mir b/test/CodeGen/X86/GlobalISel/select-add-v128.mir index a39702340bc2..4f7b6ec72d52 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-v128.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-v128.mir @@ -32,19 +32,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -74,19 +74,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -116,19 +116,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -158,19 +158,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-add-v256.mir b/test/CodeGen/X86/GlobalISel/select-add-v256.mir index 7556c2104124..143fd9422974 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-v256.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-v256.mir @@ -30,19 +30,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -70,19 +70,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -110,19 +110,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -150,19 +150,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-add-v512.mir b/test/CodeGen/X86/GlobalISel/select-add-v512.mir index e90be4e996f8..6a0cd32eefd5 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-v512.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-v512.mir @@ -31,9 +31,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -57,9 +57,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -83,9 +83,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -109,9 +109,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-add-x32.mir b/test/CodeGen/X86/GlobalISel/select-add-x32.mir index 8710aaa61a21..0b864f417367 100644 --- a/test/CodeGen/X86/GlobalISel/select-add-x32.mir +++ b/test/CodeGen/X86/GlobalISel/select-add-x32.mir @@ -13,16 +13,16 @@ alignment: 4 legalized: true regBankSelected: true # X32: registers: -# X32-NEXT: - { id: 0, class: gr32 } -# X32-NEXT: - { id: 1, class: gr32 } -# X32-NEXT: - { id: 2, class: gr32 } -# X32-NEXT: - { id: 3, class: gr32 } -# X32-NEXT: - { id: 4, class: gpr } -# X32-NEXT: - { id: 5, class: gr32 } -# X32-NEXT: - { id: 6, class: gr32 } -# X32-NEXT: - { id: 7, class: gr32 } -# X32-NEXT: - { id: 8, class: gr32 } -# X32-NEXT: - { id: 9, class: gpr } +# X32-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 4, class: gpr, preferred-register: '' } +# X32-NEXT: - { id: 5, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 6, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 7, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 8, class: gr32, preferred-register: '' } +# X32-NEXT: - { id: 9, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-add.mir b/test/CodeGen/X86/GlobalISel/select-add.mir index 7337ce12c395..78e6bb6913a4 100644 --- a/test/CodeGen/X86/GlobalISel/select-add.mir +++ b/test/CodeGen/X86/GlobalISel/select-add.mir @@ -51,9 +51,9 @@ name: test_add_i64 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr64 } -# ALL-NEXT: - { id: 1, class: gr64 } -# ALL-NEXT: - { id: 2, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -78,9 +78,9 @@ name: test_add_i32 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -106,9 +106,9 @@ legalized: true regBankSelected: true selected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr16 } -# ALL-NEXT: - { id: 2, class: gr16 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -135,9 +135,9 @@ legalized: true regBankSelected: true selected: false # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr8 } -# ALL-NEXT: - { id: 2, class: gr8 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -165,12 +165,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr32 } -# NO_AVX512F-NEXT: - { id: 1, class: fr32 } -# NO_AVX512F-NEXT: - { id: 2, class: fr32 } -# AVX512ALL-NEXT: - { id: 0, class: fr32x } -# AVX512ALL-NEXT: - { id: 1, class: fr32x } -# AVX512ALL-NEXT: - { id: 2, class: fr32x } +# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -200,12 +200,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr64 } -# NO_AVX512F-NEXT: - { id: 1, class: fr64 } -# NO_AVX512F-NEXT: - { id: 2, class: fr64 } -# AVX512ALL-NEXT: - { id: 0, class: fr64x } -# AVX512ALL-NEXT: - { id: 1, class: fr64x } -# AVX512ALL-NEXT: - { id: 2, class: fr64x } +# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -235,12 +235,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -271,12 +271,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-cmp.mir b/test/CodeGen/X86/GlobalISel/select-cmp.mir index a92c388c1db9..64c8cb6b823a 100644 --- a/test/CodeGen/X86/GlobalISel/select-cmp.mir +++ b/test/CodeGen/X86/GlobalISel/select-cmp.mir @@ -87,11 +87,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr8 } -# CHECK-NEXT: - { id: 1, class: gr8 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -124,11 +124,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr16 } -# CHECK-NEXT: - { id: 1, class: gr16 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -161,11 +161,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr64 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -198,11 +198,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -235,11 +235,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -272,11 +272,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -309,11 +309,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -346,11 +346,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -383,11 +383,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -420,11 +420,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -457,11 +457,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -494,11 +494,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -531,11 +531,11 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr8 } -# CHECK-NEXT: - { id: 3, class: gr32 } -# CHECK-NEXT: - { id: 4, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr8, preferred-register: '' } +# CHECK-NEXT: - { id: 3, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 4, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-constant.mir b/test/CodeGen/X86/GlobalISel/select-constant.mir index 162de0264435..7902a5084ce6 100644 --- a/test/CodeGen/X86/GlobalISel/select-constant.mir +++ b/test/CodeGen/X86/GlobalISel/select-constant.mir @@ -33,7 +33,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i8 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -52,7 +52,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i16 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr16 } +# CHECK-NEXT: - { id: 0, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -71,7 +71,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -90,7 +90,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i64 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -110,7 +110,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i64_u32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -129,7 +129,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i64_i32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: diff --git a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir index d1a3abfd0f93..edb467b2bf90 100644 --- a/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir +++ b/test/CodeGen/X86/GlobalISel/select-ext-x86-64.mir @@ -25,10 +25,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr8 } -# ALL-NEXT: - { id: 2, class: gr64 } -# ALL-NEXT: - { id: 3, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -57,8 +57,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -83,8 +83,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-ext.mir b/test/CodeGen/X86/GlobalISel/select-ext.mir index dccc20e57100..b52f1f6fa621 100644 --- a/test/CodeGen/X86/GlobalISel/select-ext.mir +++ b/test/CodeGen/X86/GlobalISel/select-ext.mir @@ -35,9 +35,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -63,8 +63,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -89,8 +89,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -115,8 +115,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -141,8 +141,8 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-gep.mir b/test/CodeGen/X86/GlobalISel/select-gep.mir index c8a4dc80cb2c..61c766230035 100644 --- a/test/CodeGen/X86/GlobalISel/select-gep.mir +++ b/test/CodeGen/X86/GlobalISel/select-gep.mir @@ -14,9 +14,9 @@ regBankSelected: true selected: false # CHECK-LABEL: name: test_gep_i32 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr64_nosp } -# CHECK-NEXT: - { id: 2, class: gr64 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr64_nosp, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-inc.mir b/test/CodeGen/X86/GlobalISel/select-inc.mir index 7a77864091d3..47fe6ef672ba 100644 --- a/test/CodeGen/X86/GlobalISel/select-inc.mir +++ b/test/CodeGen/X86/GlobalISel/select-inc.mir @@ -13,10 +13,10 @@ name: test_add_i8 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# INC-NEXT: - { id: 1, class: gpr } -# ADD-NEXT: - { id: 1, class: gr8 } -# ALL-NEXT: - { id: 2, class: gr8 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# INC-NEXT: - { id: 1, class: gpr, preferred-register: '' } +# ADD-NEXT: - { id: 1, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir index 539520c0b8f5..9128f19b1d24 100644 --- a/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir +++ b/test/CodeGen/X86/GlobalISel/select-leaf-constant.mir @@ -29,7 +29,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1 # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -47,7 +47,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1_optsize # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -65,7 +65,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1b # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: @@ -83,7 +83,7 @@ regBankSelected: true selected: false # CHECK-LABEL: name: const_i32_1_optsizeb # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } # CHECK: body: diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir index 8e6a2771db6e..09f414b48a8a 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar-x32.mir @@ -49,9 +49,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr8 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -79,9 +79,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr16 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -109,9 +109,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -139,10 +139,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr8 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr8, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -176,10 +176,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -213,10 +213,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -250,9 +250,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -280,10 +280,10 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } -# ALL-NEXT: - { id: 3, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 3, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir index b57c9b0cca98..6d03d7525d20 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-scalar.mir @@ -91,8 +91,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr8 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr8, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -115,8 +115,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr16 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr16, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -139,8 +139,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr32 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr32, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -163,8 +163,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -187,8 +187,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr32 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr32, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -211,9 +211,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: fr32 } -# AVX512ALL: - { id: 1, class: fr32x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: fr32, preferred-register: '' } +# AVX512ALL: - { id: 1, class: fr32x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -238,8 +238,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -262,9 +262,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: fr64 } -# AVX512ALL: - { id: 1, class: fr64x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: fr64, preferred-register: '' } +# AVX512ALL: - { id: 1, class: fr64x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -289,8 +289,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr32 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr32, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %edi @@ -315,8 +315,8 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %0 = COPY %rdi @@ -341,9 +341,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: fr32x } -# ALL: - { id: 1, class: gr64 } -# ALL: - { id: 2, class: gr32 } +# ALL: - { id: 0, class: fr32x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } +# ALL: - { id: 2, class: gr32, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -371,9 +371,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: fr32 } -# AVX512ALL: - { id: 0, class: fr32x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: fr32, preferred-register: '' } +# AVX512ALL: - { id: 0, class: fr32x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 @@ -400,9 +400,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: fr64x } -# ALL: - { id: 1, class: gr64 } -# ALL: - { id: 2, class: gr64 } +# ALL: - { id: 0, class: fr64x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } +# ALL: - { id: 2, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -430,9 +430,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: fr64 } -# AVX512ALL: - { id: 0, class: fr64x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: fr64, preferred-register: '' } +# AVX512ALL: - { id: 0, class: fr64x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 @@ -460,8 +460,8 @@ legalized: true regBankSelected: true selected: false registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.ptr1) @@ -483,8 +483,8 @@ legalized: true regBankSelected: true selected: false registers: -# ALL: - { id: 0, class: gr64 } -# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: gpr } # ALL: MOV64mr %0, 1, _, 0, _, %1 :: (store 8 into %ir.ptr1) diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir index ce3f6b91dcf6..08844657e2a2 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-v128.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-v128.mir @@ -32,9 +32,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: vr128 } -# AVX512ALL: - { id: 1, class: vr128x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -60,9 +60,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# ALL: - { id: 0, class: gr64 } -# NO_AVX512F: - { id: 1, class: vr128 } -# AVX512ALL: - { id: 1, class: vr128x } +# ALL: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F: - { id: 1, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 1, class: vr128x, preferred-register: '' } - { id: 0, class: gpr } - { id: 1, class: vecr } # ALL: %0 = COPY %rdi @@ -88,9 +88,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: vr128 } -# AVX512ALL: - { id: 0, class: vr128x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 @@ -118,9 +118,9 @@ alignment: 4 legalized: true regBankSelected: true registers: -# NO_AVX512F: - { id: 0, class: vr128 } -# AVX512ALL: - { id: 0, class: vr128x } -# ALL: - { id: 1, class: gr64 } +# NO_AVX512F: - { id: 0, class: vr128, preferred-register: '' } +# AVX512ALL: - { id: 0, class: vr128x, preferred-register: '' } +# ALL: - { id: 1, class: gr64, preferred-register: '' } - { id: 0, class: vecr } - { id: 1, class: gpr } # ALL: %0 = COPY %xmm0 diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir index b9a7e4a8cc4a..ff371ad9989f 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-v256.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-v256.mir @@ -33,12 +33,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: gr64 } -# NO_AVX512F-NEXT: - { id: 1, class: vr256 } +# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: gr64 } -# AVX512ALL-NEXT: - { id: 1, class: vr256x } +# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -73,12 +73,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: gr64 } -# NO_AVX512F-NEXT: - { id: 1, class: vr256 } +# NO_AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: vr256, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: gr64 } -# AVX512ALL-NEXT: - { id: 1, class: vr256x } +# AVX512ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -113,12 +113,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: vr256 } -# NO_AVX512F-NEXT: - { id: 1, class: gr64 } +# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: vr256x } -# AVX512ALL-NEXT: - { id: 1, class: gr64 } +# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } @@ -153,12 +153,12 @@ alignment: 4 legalized: true regBankSelected: true # NO_AVX512F: registers: -# NO_AVX512F-NEXT: - { id: 0, class: vr256 } -# NO_AVX512F-NEXT: - { id: 1, class: gr64 } +# NO_AVX512F-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } # # AVX512ALL: registers: -# AVX512ALL-NEXT: - { id: 0, class: vr256x } -# AVX512ALL-NEXT: - { id: 1, class: gr64 } +# AVX512ALL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir index 87978a684d4c..131902d81a00 100644 --- a/test/CodeGen/X86/GlobalISel/select-memop-v512.mir +++ b/test/CodeGen/X86/GlobalISel/select-memop-v512.mir @@ -28,8 +28,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: gr64 } -# AVX512F-NEXT: - { id: 1, class: vr512 } +# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -54,8 +54,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: gr64 } -# AVX512F-NEXT: - { id: 1, class: vr512 } +# AVX512F-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: vr512, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: vecr } @@ -80,8 +80,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: vr512 } -# AVX512F-NEXT: - { id: 1, class: gr64 } +# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } @@ -106,8 +106,8 @@ alignment: 4 legalized: true regBankSelected: true # AVX512F: registers: -# AVX512F-NEXT: - { id: 0, class: vr512 } -# AVX512F-NEXT: - { id: 1, class: gr64 } +# AVX512F-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# AVX512F-NEXT: - { id: 1, class: gr64, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir index 34a77acc2d1e..453557c08469 100644 --- a/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir +++ b/test/CodeGen/X86/GlobalISel/select-mul-scalar.mir @@ -24,9 +24,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr16 } -# ALL-NEXT: - { id: 1, class: gr16 } -# ALL-NEXT: - { id: 2, class: gr16 } +# ALL-NEXT: - { id: 0, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr16, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -55,9 +55,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -86,9 +86,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr64 } -# ALL-NEXT: - { id: 1, class: gr64 } -# ALL-NEXT: - { id: 2, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir index 5f8ab1e4f189..d3651ccd1ab9 100644 --- a/test/CodeGen/X86/GlobalISel/select-mul-vec.mir +++ b/test/CodeGen/X86/GlobalISel/select-mul-vec.mir @@ -95,9 +95,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -121,9 +121,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -147,9 +147,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128x } -# CHECK-NEXT: - { id: 1, class: vr128x } -# CHECK-NEXT: - { id: 2, class: vr128x } +# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -173,9 +173,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -199,9 +199,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128 } -# CHECK-NEXT: - { id: 1, class: vr128 } -# CHECK-NEXT: - { id: 2, class: vr128 } +# CHECK-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -225,9 +225,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128x } -# CHECK-NEXT: - { id: 1, class: vr128x } -# CHECK-NEXT: - { id: 2, class: vr128x } +# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -251,9 +251,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr128x } -# CHECK-NEXT: - { id: 1, class: vr128x } -# CHECK-NEXT: - { id: 2, class: vr128x } +# CHECK-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -277,9 +277,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256 } -# CHECK-NEXT: - { id: 1, class: vr256 } -# CHECK-NEXT: - { id: 2, class: vr256 } +# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -303,9 +303,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256x } -# CHECK-NEXT: - { id: 1, class: vr256x } -# CHECK-NEXT: - { id: 2, class: vr256x } +# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -329,9 +329,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256 } -# CHECK-NEXT: - { id: 1, class: vr256 } -# CHECK-NEXT: - { id: 2, class: vr256 } +# CHECK-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -355,9 +355,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256x } -# CHECK-NEXT: - { id: 1, class: vr256x } -# CHECK-NEXT: - { id: 2, class: vr256x } +# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -381,9 +381,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr256x } -# CHECK-NEXT: - { id: 1, class: vr256x } -# CHECK-NEXT: - { id: 2, class: vr256x } +# CHECK-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -407,9 +407,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr512 } -# CHECK-NEXT: - { id: 1, class: vr512 } -# CHECK-NEXT: - { id: 2, class: vr512 } +# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -433,9 +433,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr512 } -# CHECK-NEXT: - { id: 1, class: vr512 } -# CHECK-NEXT: - { id: 2, class: vr512 } +# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -459,9 +459,9 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: vr512 } -# CHECK-NEXT: - { id: 1, class: vr512 } -# CHECK-NEXT: - { id: 2, class: vr512 } +# CHECK-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# CHECK-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir index d60d4155e29d..f77879d93009 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub-v128.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub-v128.mir @@ -32,19 +32,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -74,19 +74,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128 } -# AVX512VL-NEXT: - { id: 1, class: vr128 } -# AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -116,19 +116,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -158,19 +158,19 @@ alignment: 4 legalized: true regBankSelected: true # NOVL: registers: -# NOVL-NEXT: - { id: 0, class: vr128 } -# NOVL-NEXT: - { id: 1, class: vr128 } -# NOVL-NEXT: - { id: 2, class: vr128 } +# NOVL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NOVL-NEXT: - { id: 2, class: vr128, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr128x } -# AVX512BWVL-NEXT: - { id: 1, class: vr128x } -# AVX512BWVL-NEXT: - { id: 2, class: vr128x } +# AVX512BWVL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir index fbc44997b4a2..d6bde7fbb691 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub-v256.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub-v256.mir @@ -30,19 +30,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -70,19 +70,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256 } -# AVX512VL-NEXT: - { id: 1, class: vr256 } -# AVX512VL-NEXT: - { id: 2, class: vr256 } +# AVX512VL-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -110,19 +110,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -150,19 +150,19 @@ alignment: 4 legalized: true regBankSelected: true # AVX2: registers: -# AVX2-NEXT: - { id: 0, class: vr256 } -# AVX2-NEXT: - { id: 1, class: vr256 } -# AVX2-NEXT: - { id: 2, class: vr256 } +# AVX2-NEXT: - { id: 0, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 1, class: vr256, preferred-register: '' } +# AVX2-NEXT: - { id: 2, class: vr256, preferred-register: '' } # # AVX512VL: registers: -# AVX512VL-NEXT: - { id: 0, class: vr256x } -# AVX512VL-NEXT: - { id: 1, class: vr256x } -# AVX512VL-NEXT: - { id: 2, class: vr256x } +# AVX512VL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } # # AVX512BWVL: registers: -# AVX512BWVL-NEXT: - { id: 0, class: vr256x } -# AVX512BWVL-NEXT: - { id: 1, class: vr256x } -# AVX512BWVL-NEXT: - { id: 2, class: vr256x } +# AVX512BWVL-NEXT: - { id: 0, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 1, class: vr256x, preferred-register: '' } +# AVX512BWVL-NEXT: - { id: 2, class: vr256x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir index dcd05f056949..828a243b2656 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub-v512.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub-v512.mir @@ -31,9 +31,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -57,9 +57,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -83,9 +83,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -109,9 +109,9 @@ alignment: 4 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: vr512 } -# ALL-NEXT: - { id: 1, class: vr512 } -# ALL-NEXT: - { id: 2, class: vr512 } +# ALL-NEXT: - { id: 0, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: vr512, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: vr512, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-sub.mir b/test/CodeGen/X86/GlobalISel/select-sub.mir index d4db6eec6d80..4768a2d93222 100644 --- a/test/CodeGen/X86/GlobalISel/select-sub.mir +++ b/test/CodeGen/X86/GlobalISel/select-sub.mir @@ -40,9 +40,9 @@ name: test_sub_i64 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr64 } -# ALL-NEXT: - { id: 1, class: gr64 } -# ALL-NEXT: - { id: 2, class: gr64 } +# ALL-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr64, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -66,9 +66,9 @@ name: test_sub_i32 legalized: true regBankSelected: true # ALL: registers: -# ALL-NEXT: - { id: 0, class: gr32 } -# ALL-NEXT: - { id: 1, class: gr32 } -# ALL-NEXT: - { id: 2, class: gr32 } +# ALL-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 1, class: gr32, preferred-register: '' } +# ALL-NEXT: - { id: 2, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -94,12 +94,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr32 } -# NO_AVX512F-NEXT: - { id: 1, class: fr32 } -# NO_AVX512F-NEXT: - { id: 2, class: fr32 } -# AVX512ALL-NEXT: - { id: 0, class: fr32x } -# AVX512ALL-NEXT: - { id: 1, class: fr32x } -# AVX512ALL-NEXT: - { id: 2, class: fr32x } +# NO_AVX512F-NEXT: - { id: 0, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr32, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr32, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr32x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr32x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -128,12 +128,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512F-NEXT: - { id: 0, class: fr64 } -# NO_AVX512F-NEXT: - { id: 1, class: fr64 } -# NO_AVX512F-NEXT: - { id: 2, class: fr64 } -# AVX512ALL-NEXT: - { id: 0, class: fr64x } -# AVX512ALL-NEXT: - { id: 1, class: fr64x } -# AVX512ALL-NEXT: - { id: 2, class: fr64x } +# NO_AVX512F-NEXT: - { id: 0, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 1, class: fr64, preferred-register: '' } +# NO_AVX512F-NEXT: - { id: 2, class: fr64, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 0, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 1, class: fr64x, preferred-register: '' } +# AVX512ALL-NEXT: - { id: 2, class: fr64x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -161,12 +161,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } @@ -196,12 +196,12 @@ regBankSelected: true selected: false tracksRegLiveness: true # ALL: registers: -# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } -# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } -# AVX512VL-NEXT: - { id: 0, class: vr128x } -# AVX512VL-NEXT: - { id: 1, class: vr128x } -# AVX512VL-NEXT: - { id: 2, class: vr128x } +# NO_AVX512VL-NEXT: - { id: 0, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128, preferred-register: '' } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128, preferred-register: '' } +# AVX512VL-NEXT: - { id: 0, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 1, class: vr128x, preferred-register: '' } +# AVX512VL-NEXT: - { id: 2, class: vr128x, preferred-register: '' } registers: - { id: 0, class: vecr } - { id: 1, class: vecr } diff --git a/test/CodeGen/X86/GlobalISel/select-trunc.mir b/test/CodeGen/X86/GlobalISel/select-trunc.mir index 9b90543d6559..4df585628ddc 100644 --- a/test/CodeGen/X86/GlobalISel/select-trunc.mir +++ b/test/CodeGen/X86/GlobalISel/select-trunc.mir @@ -38,8 +38,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -64,8 +64,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -90,8 +90,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr16 } +# CHECK-NEXT: - { id: 0, class: gr32, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -116,8 +116,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit } -# CHECK-NEXT: - { id: 1, class: gr8 } +# CHECK-NEXT: - { id: 0, class: gr64_with_sub_8bit, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -142,8 +142,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr16 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -168,8 +168,8 @@ alignment: 4 legalized: true regBankSelected: true # CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr32 } +# CHECK-NEXT: - { id: 0, class: gr64, preferred-register: '' } +# CHECK-NEXT: - { id: 1, class: gr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll index 874e3e379d8e..5e375cc42e01 100644 --- a/test/CodeGen/X86/O0-pipeline.ll +++ b/test/CodeGen/X86/O0-pipeline.ll @@ -5,12 +5,12 @@ ; CHECK-LABEL: Pass Arguments: ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: Target Pass Configuration +; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Target Transform Information ; CHECK-NEXT: Type-Based Alias Analysis ; CHECK-NEXT: Scoped NoAlias Alias Analysis ; CHECK-NEXT: Assumption Cache Tracker ; CHECK-NEXT: Create Garbage Collector Module Metadata -; CHECK-NEXT: Machine Module Information ; CHECK-NEXT: Machine Branch Probability Analysis ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: Pre-ISel Intrinsic Lowering diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll index ed2df277480e..e79d2e69e347 100644 --- a/test/CodeGen/X86/atom-fixup-lea3.ll +++ b/test/CodeGen/X86/atom-fixup-lea3.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s -; CHECK: addl ([[reg:%[a-z]+]]) -; CHECK-NEXT: addl $4, [[reg]] +; CHECK: addl ({{%[a-z]+}},[[reg:%[a-z]+]],4) +; CHECK-NEXT: movl +; CHECK-NEXT: addl 4({{%[a-z]+}},[[reg:%[a-z]+]],4) +; CHECK-NEXT: incl ; Test for the FixupLEAs pre-emit pass. ; An LEA should NOT be substituted for the ADD instruction @@ -20,7 +22,7 @@ ; return sum; ;} -define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 { +define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %k, i32* nocapture %l, i32* nocapture %m, i32* nocapture %array2) #0 { entry: %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.lr.ph, label %for.end @@ -35,6 +37,9 @@ for.body: ; preds = %for.body, %for.body %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ] %inc1 = add nsw i32 %j.09, 1 %arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09 + store i32 %0, i32* %m, align 4 + store i32 %sum.010, i32* %m, align 4 + store i32 %0, i32* %m, align 4 %1 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %0, %1 store i32 %add, i32* %m, align 4 diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index bb05481e313d..47e95fe31bdf 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -910,14 +910,14 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddpd: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -941,14 +941,14 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_haddps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 @@ -972,14 +972,14 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubpd: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) %2 = load <4 x double>, <4 x double> *%a2, align 32 @@ -1003,14 +1003,14 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_hsubps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] +; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) %2 = load <8 x float>, <8 x float> *%a2, align 32 diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll index 1914b5134bee..91d1f64c6706 100644 --- a/test/CodeGen/X86/avx-splat.ll +++ b/test/CodeGen/X86/avx-splat.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcA: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -14,7 +14,7 @@ entry: define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcB: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -26,7 +26,7 @@ entry: define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcC: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vmovq %rdi, %xmm0 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -41,7 +41,7 @@ entry: define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcD: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -58,20 +58,20 @@ entry: ; define <8 x float> @funcE() nounwind { ; CHECK-LABEL: funcE: -; CHECK: ## BB#0: ## %for_exit499 +; CHECK: # BB#0: # %for_exit499 ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## implicit-def: %YMM0 +; CHECK-NEXT: # implicit-def: %YMM0 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne LBB4_2 -; CHECK-NEXT: ## BB#1: ## %load.i1247 +; CHECK-NEXT: jne .LBB4_2 +; CHECK-NEXT: # BB#1: # %load.i1247 ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $1312, %rsp ## imm = 0x520 +; CHECK-NEXT: subq $1312, %rsp # imm = 0x520 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp -; CHECK-NEXT: LBB4_2: ## %__load_and_broadcast_32.exit1249 +; CHECK-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249 ; CHECK-NEXT: retq allocas: %udx495 = alloca [18 x [18 x float]], align 32 @@ -99,7 +99,7 @@ __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_ex define <8 x float> @funcF(i32 %val) nounwind { ; CHECK-LABEL: funcF: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vmovd %edi, %xmm0 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 @@ -112,7 +112,7 @@ define <8 x float> @funcF(i32 %val) nounwind { define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcG: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq @@ -123,7 +123,7 @@ entry: define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { ; CHECK-LABEL: funcH: -; CHECK: ## BB#0: ## %entry +; CHECK: # BB#0: # %entry ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; CHECK-NEXT: retq @@ -134,7 +134,7 @@ entry: define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { ; CHECK-LABEL: splat_load_2f64_11: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; CHECK-NEXT: retq %x = load <2 x double>, <2 x double>* %ptr @@ -144,7 +144,7 @@ define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) { define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { ; CHECK-LABEL: splat_load_4f64_2222: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0 ; CHECK-NEXT: retq %x = load <4 x double>, <4 x double>* %ptr @@ -154,7 +154,7 @@ define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) { define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { ; CHECK-LABEL: splat_load_4f32_0000: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 ; CHECK-NEXT: retq %x = load <4 x float>, <4 x float>* %ptr @@ -164,7 +164,7 @@ define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) { define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) { ; CHECK-LABEL: splat_load_8f32_77777777: -; CHECK: ## BB#0: +; CHECK: # BB#0: ; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0 ; CHECK-NEXT: retq %x = load <8 x float>, <8 x float>* %ptr diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 8f6afa8785d0..140299f5495d 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1549,8 +1549,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { ; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpextrb $8, %xmm0, %eax ; NOVL-NEXT: andl $1, %eax @@ -1579,8 +1577,6 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) { ; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; NOVL-NEXT: retq diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll index 8c3a6790ffa6..c73d7654045e 100644 --- a/test/CodeGen/X86/build-vector-128.ll +++ b/test/CodeGen/X86/build-vector-128.ll @@ -41,9 +41,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa ; ; SSE2-64-LABEL: test_buildvector_v4f32: ; SSE2-64: # BB#0: -; SSE2-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-64-NEXT: retq ; ; SSE41-64-LABEL: test_buildvector_v4f32: @@ -74,13 +74,9 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa define <2 x i64> @test_buildvector_v2i64(i64 %a0, i64 %a1) { ; SSE2-32-LABEL: test_buildvector_v2i64: ; SSE2-32: # BB#0: -; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; SSE2-32-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-32-NEXT: retl ; ; SSE-64-LABEL: test_buildvector_v2i64: @@ -126,12 +122,12 @@ define <4 x i32> @test_buildvector_v4i32(i32 %f0, i32 %f1, i32 %f2, i32 %f3) { ; SSE2-64-LABEL: test_buildvector_v4i32: ; SSE2-64: # BB#0: ; SSE2-64-NEXT: movd %ecx, %xmm0 -; SSE2-64-NEXT: movd %esi, %xmm1 +; SSE2-64-NEXT: movd %edx, %xmm1 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-64-NEXT: movd %edx, %xmm2 +; SSE2-64-NEXT: movd %esi, %xmm2 ; SSE2-64-NEXT: movd %edi, %xmm0 ; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-64-NEXT: retq ; ; SSE41-64-LABEL: test_buildvector_v4i32: @@ -170,34 +166,34 @@ define <8 x i16> @test_buildvector_v8i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-32-NEXT: retl ; ; SSE2-64-LABEL: test_buildvector_v8i16: ; SSE2-64: # BB#0: -; SSE2-64-NEXT: movd %ecx, %xmm0 +; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-64-NEXT: movd %r9d, %xmm1 -; SSE2-64-NEXT: movd %esi, %xmm2 -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-64-NEXT: movd %r9d, %xmm0 +; SSE2-64-NEXT: movd %r8d, %xmm2 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-64-NEXT: movd %ecx, %xmm0 ; SSE2-64-NEXT: movd %edx, %xmm1 -; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-64-NEXT: movd %r8d, %xmm3 +; SSE2-64-NEXT: movd %esi, %xmm3 ; SSE2-64-NEXT: movd %edi, %xmm0 ; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-64-NEXT: retq ; ; SSE41-32-LABEL: test_buildvector_v8i16: @@ -267,31 +263,31 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-32-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE2-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSE2-32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE2-32-NEXT: retl ; ; SSE2-64-LABEL: test_buildvector_v16i8: @@ -299,34 +295,34 @@ define <16 x i8> @test_buildvector_v16i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-64-NEXT: movd %ecx, %xmm0 -; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-64-NEXT: movd %r9d, %xmm1 +; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-64-NEXT: movd %esi, %xmm2 -; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-64-NEXT: movd %edx, %xmm3 ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-64-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-64-NEXT: movd %r8d, %xmm1 +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE2-64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-64-NEXT: movd %r9d, %xmm0 +; SSE2-64-NEXT: movd %r8d, %xmm2 +; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-64-NEXT: movd %ecx, %xmm0 +; SSE2-64-NEXT: movd %edx, %xmm1 ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-64-NEXT: movd %esi, %xmm4 ; SSE2-64-NEXT: movd %edi, %xmm0 -; SSE2-64-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE2-64-NEXT: retq ; ; SSE41-32-LABEL: test_buildvector_v16i8: diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll index 730376acdc93..cd5abc1373b9 100644 --- a/test/CodeGen/X86/buildvec-insertvec.ll +++ b/test/CodeGen/X86/buildvec-insertvec.ll @@ -75,9 +75,9 @@ entry: define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float %f2, float %f3) { ; SSE2-LABEL: test_buildvector_v4f32_register: ; SSE2: # BB#0: -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_register: @@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* % ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_load: @@ -126,10 +126,10 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* % define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, float %f2, float* %p3) { ; SSE2-LABEL: test_buildvector_v4f32_partial_load: ; SSE2: # BB#0: -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_partial_load: @@ -150,12 +150,12 @@ define <4 x i32> @test_buildvector_v4i32_register(i32 %a0, i32 %a1, i32 %a2, i32 ; SSE2-LABEL: test_buildvector_v4i32_register: ; SSE2: # BB#0: ; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: movd %esi, %xmm1 +; SSE2-NEXT: movd %edx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: movd %edx, %xmm2 +; SSE2-NEXT: movd %esi, %xmm2 ; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4i32_register: @@ -178,7 +178,7 @@ define <4 x i32> @test_buildvector_v4i32_partial(i32 %a0, i32 %a3) { ; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: movd %esi, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4i32_partial: @@ -228,21 +228,21 @@ define <4 x i32> @test_buildvector_v4i32_register_zero_2(i32 %a1, i32 %a2, i32 % define <8 x i16> @test_buildvector_v8i16_register(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) { ; SSE2-LABEL: test_buildvector_v8i16_register: ; SSE2: # BB#0: -; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: movd %r9d, %xmm1 -; SSE2-NEXT: movd %esi, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movd %r9d, %xmm0 +; SSE2-NEXT: movd %r8d, %xmm2 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movd %edx, %xmm1 -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: movd %r8d, %xmm3 +; SSE2-NEXT: movd %esi, %xmm3 ; SSE2-NEXT: movd %edi, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v8i16_register: @@ -333,34 +333,34 @@ define <16 x i8> @test_buildvector_v16i8_register(i8 %a0, i8 %a1, i8 %a2, i8 %a3 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %ecx, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: movd %r9d, %xmm1 +; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: movd %esi, %xmm2 -; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: movd %edx, %xmm3 ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: movd %r8d, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movd %r9d, %xmm0 +; SSE2-NEXT: movd %r8d, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: movd %esi, %xmm4 ; SSE2-NEXT: movd %edi, %xmm0 -; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v16i8_register: diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 1218b68b1be4..f6d816ec8919 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -159,28 +159,7 @@ define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { ; SSE-LABEL: _clearupper8xi16a: ; SSE: # BB#0: -; SSE-NEXT: pextrw $1, %xmm0, %eax -; SSE-NEXT: pextrw $2, %xmm0, %r9d -; SSE-NEXT: pextrw $3, %xmm0, %edx -; SSE-NEXT: pextrw $4, %xmm0, %r8d -; SSE-NEXT: pextrw $5, %xmm0, %edi -; SSE-NEXT: pextrw $6, %xmm0, %esi -; SSE-NEXT: pextrw $7, %xmm0, %ecx -; SSE-NEXT: movd %ecx, %xmm1 -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE-NEXT: movd %edi, %xmm1 -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE-NEXT: movd %esi, %xmm1 -; SSE-NEXT: movd %r9d, %xmm2 -; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE-NEXT: movd %r8d, %xmm1 -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper8xi16a: @@ -225,61 +204,9 @@ define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { ; SSE-LABEL: _clearupper16xi16a: ; SSE: # BB#0: -; SSE-NEXT: pushq %rbp -; SSE-NEXT: pushq %r15 -; SSE-NEXT: pushq %r14 -; SSE-NEXT: pushq %r12 -; SSE-NEXT: pushq %rbx -; SSE-NEXT: pextrw $1, %xmm0, %edi -; SSE-NEXT: pextrw $2, %xmm0, %eax -; SSE-NEXT: pextrw $3, %xmm0, %ecx -; SSE-NEXT: pextrw $4, %xmm0, %edx -; SSE-NEXT: pextrw $5, %xmm0, %esi -; SSE-NEXT: pextrw $6, %xmm0, %ebx -; SSE-NEXT: pextrw $7, %xmm0, %ebp -; SSE-NEXT: pextrw $1, %xmm1, %r10d -; SSE-NEXT: pextrw $2, %xmm1, %r9d -; SSE-NEXT: pextrw $3, %xmm1, %r14d -; SSE-NEXT: pextrw $4, %xmm1, %r8d -; SSE-NEXT: pextrw $5, %xmm1, %r15d -; SSE-NEXT: pextrw $6, %xmm1, %r11d -; SSE-NEXT: pextrw $7, %xmm1, %r12d -; SSE-NEXT: movd %ebp, %xmm2 -; SSE-NEXT: movd %ecx, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE-NEXT: movd %esi, %xmm2 -; SSE-NEXT: movd %edi, %xmm4 -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE-NEXT: movd %ebx, %xmm2 -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: movd %r12d, %xmm3 -; SSE-NEXT: movd %r14d, %xmm4 -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE-NEXT: movd %r15d, %xmm3 -; SSE-NEXT: movd %r10d, %xmm5 -; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] -; SSE-NEXT: movd %r11d, %xmm3 -; SSE-NEXT: movd %r9d, %xmm4 -; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; SSE-NEXT: movd %r8d, %xmm3 -; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] -; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: popq %rbx -; SSE-NEXT: popq %r12 -; SSE-NEXT: popq %r14 -; SSE-NEXT: popq %r15 -; SSE-NEXT: popq %rbp +; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE-NEXT: andps %xmm2, %xmm0 +; SSE-NEXT: andps %xmm2, %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper16xi16a: @@ -364,10 +291,9 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -375,31 +301,32 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -486,10 +413,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -497,31 +423,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm3 -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm2 +; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -531,10 +458,9 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm4 +; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -542,31 +468,32 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { ; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm3 +; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero -; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 +; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE-NEXT: movd %eax, %xmm5 -; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE-NEXT: movd %eax, %xmm1 +; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero +; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] +; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm4 +; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] ; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; SSE-NEXT: movd %eax, %xmm6 -; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] +; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] -; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: retq ; diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll index 4140721bd5f3..33d001cdc216 100644 --- a/test/CodeGen/X86/fast-isel-nontemporal.ll +++ b/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(<8 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xfloat: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xfloat: @@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdouble(<4 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xdouble: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xdouble: @@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi8: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi8: @@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<16 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi16: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi16: @@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi32: @@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt4xi64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovdqa (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xi64: @@ -957,8 +981,16 @@ define <16 x float> @test_load_nt16xfloat(<16 x float>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xfloat: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1003,8 +1035,16 @@ define <8 x double> @test_load_nt8xdouble(<8 x double>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xdouble: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovapd (%rdi), %ymm0 -; AVX1-NEXT: vmovapd 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1049,8 +1089,16 @@ define <64 x i8> @test_load_nt64xi8(<64 x i8>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt64xi8: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1101,8 +1149,16 @@ define <32 x i16> @test_load_nt32xi16(<32 x i16>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt32xi16: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1153,8 +1209,16 @@ define <16 x i32> @test_load_nt16xi32(<16 x i32>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt16xi32: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1199,8 +1263,16 @@ define <8 x i64> @test_load_nt8xi64(<8 x i64>* nocapture %ptr) { ; ; AVX1-LABEL: test_load_nt8xi64: ; AVX1: # BB#0: # %entry -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: # implicit-def: %YMM1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index 85b2b41fa191..068480873c23 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -1,16 +1,10 @@ ; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s -; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s +; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck %s define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { -; ATOM: foo -; ATOM: addl -; ATOM: addl -; ATOM: leal ; CHECK: foo -; CHECK: addl -; CHECK: addl -; CHECK: addl +; CHECK: incl entry: %0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1] diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll index 4596b83f7bc2..fd023d018031 100644 --- a/test/CodeGen/X86/haddsub-2.ll +++ b/test/CodeGen/X86/haddsub-2.ll @@ -142,12 +142,12 @@ define <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %edi ; SSE3-NEXT: addl %eax, %edi ; SSE3-NEXT: movd %edi, %xmm0 -; SSE3-NEXT: movd %edx, %xmm1 +; SSE3-NEXT: movd %esi, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %esi, %xmm2 +; SSE3-NEXT: movd %edx, %xmm2 ; SSE3-NEXT: movd %ecx, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phadd_d_test1: @@ -196,16 +196,16 @@ define <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %esi ; SSE3-NEXT: addl %eax, %esi ; SSE3-NEXT: movd %esi, %xmm0 +; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,2,3] +; SSE3-NEXT: movd %xmm2, %eax +; SSE3-NEXT: movd %xmm1, %esi +; SSE3-NEXT: addl %eax, %esi +; SSE3-NEXT: movd %esi, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: movd %ecx, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %eax -; SSE3-NEXT: movd %xmm1, %ecx -; SSE3-NEXT: addl %eax, %ecx -; SSE3-NEXT: movd %ecx, %xmm1 ; SSE3-NEXT: movd %edx, %xmm0 -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phadd_d_test2: @@ -258,12 +258,12 @@ define <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %edi ; SSE3-NEXT: subl %edi, %esi ; SSE3-NEXT: movd %esi, %xmm0 -; SSE3-NEXT: movd %ecx, %xmm1 +; SSE3-NEXT: movd %edx, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %edx, %xmm2 +; SSE3-NEXT: movd %ecx, %xmm2 ; SSE3-NEXT: movd %eax, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phsub_d_test1: @@ -312,16 +312,16 @@ define <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %esi ; SSE3-NEXT: subl %esi, %edx ; SSE3-NEXT: movd %edx, %xmm0 +; SSE3-NEXT: movd %xmm1, %edx +; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] +; SSE3-NEXT: movd %xmm1, %esi +; SSE3-NEXT: subl %esi, %edx +; SSE3-NEXT: movd %edx, %xmm1 +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE3-NEXT: movd %eax, %xmm2 -; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE3-NEXT: movd %xmm1, %eax -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %edx -; SSE3-NEXT: subl %edx, %eax -; SSE3-NEXT: movd %eax, %xmm1 ; SSE3-NEXT: movd %ecx, %xmm0 -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: phsub_d_test2: @@ -518,19 +518,19 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %r9d ; SSE3-NEXT: addl %edx, %r9d -; SSE3-NEXT: movd %xmm1, %esi +; SSE3-NEXT: movd %xmm1, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r10d -; SSE3-NEXT: addl %esi, %r10d -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %esi +; SSE3-NEXT: addl %edx, %esi +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %edi -; SSE3-NEXT: addl %esi, %edi +; SSE3-NEXT: addl %edx, %edi ; SSE3-NEXT: movd %xmm2, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r11d -; SSE3-NEXT: addl %eax, %r11d +; SSE3-NEXT: movd %xmm0, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] @@ -541,24 +541,24 @@ define <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { ; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: addl %eax, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] -; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: movd %xmm0, %r11d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3] -; SSE3-NEXT: movd %xmm0, %esi -; SSE3-NEXT: addl %eax, %esi +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: addl %r11d, %eax ; SSE3-NEXT: movd %edi, %xmm0 -; SSE3-NEXT: movd %r9d, %xmm1 +; SSE3-NEXT: movd %esi, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %r10d, %xmm2 +; SSE3-NEXT: movd %r9d, %xmm2 ; SSE3-NEXT: movd %r8d, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE3-NEXT: movd %esi, %xmm1 -; SSE3-NEXT: movd %ecx, %xmm2 +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE3-NEXT: movd %eax, %xmm1 +; SSE3-NEXT: movd %edx, %xmm2 ; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE3-NEXT: movd %edx, %xmm3 -; SSE3-NEXT: movd %r11d, %xmm1 +; SSE3-NEXT: movd %ecx, %xmm3 +; SSE3-NEXT: movd %r10d, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: avx2_vphadd_d_test: @@ -658,83 +658,83 @@ define <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) { ; SSE3-NEXT: addl %eax, %ecx ; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE3-NEXT: pextrw $2, %xmm0, %eax -; SSE3-NEXT: pextrw $3, %xmm0, %r11d -; SSE3-NEXT: addl %eax, %r11d +; SSE3-NEXT: pextrw $3, %xmm0, %ecx +; SSE3-NEXT: addl %eax, %ecx +; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE3-NEXT: pextrw $4, %xmm0, %eax -; SSE3-NEXT: pextrw $5, %xmm0, %r10d -; SSE3-NEXT: addl %eax, %r10d +; SSE3-NEXT: pextrw $5, %xmm0, %r11d +; SSE3-NEXT: addl %eax, %r11d ; SSE3-NEXT: pextrw $6, %xmm0, %eax -; SSE3-NEXT: pextrw $7, %xmm0, %r13d -; SSE3-NEXT: addl %eax, %r13d +; SSE3-NEXT: pextrw $7, %xmm0, %r15d +; SSE3-NEXT: addl %eax, %r15d ; SSE3-NEXT: movd %xmm1, %eax -; SSE3-NEXT: pextrw $1, %xmm1, %r14d -; SSE3-NEXT: addl %eax, %r14d +; SSE3-NEXT: pextrw $1, %xmm1, %r13d +; SSE3-NEXT: addl %eax, %r13d ; SSE3-NEXT: pextrw $2, %xmm1, %eax -; SSE3-NEXT: pextrw $3, %xmm1, %ebp -; SSE3-NEXT: addl %eax, %ebp -; SSE3-NEXT: pextrw $4, %xmm1, %eax -; SSE3-NEXT: pextrw $5, %xmm1, %ebx +; SSE3-NEXT: pextrw $3, %xmm1, %ebx ; SSE3-NEXT: addl %eax, %ebx +; SSE3-NEXT: pextrw $4, %xmm1, %eax +; SSE3-NEXT: pextrw $5, %xmm1, %r8d +; SSE3-NEXT: addl %eax, %r8d ; SSE3-NEXT: pextrw $6, %xmm1, %eax -; SSE3-NEXT: pextrw $7, %xmm1, %edx -; SSE3-NEXT: addl %eax, %edx +; SSE3-NEXT: pextrw $7, %xmm1, %esi +; SSE3-NEXT: addl %eax, %esi ; SSE3-NEXT: movd %xmm2, %eax -; SSE3-NEXT: pextrw $1, %xmm2, %ecx -; SSE3-NEXT: addl %eax, %ecx -; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE3-NEXT: pextrw $1, %xmm2, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pextrw $2, %xmm2, %eax -; SSE3-NEXT: pextrw $3, %xmm2, %r12d -; SSE3-NEXT: addl %eax, %r12d +; SSE3-NEXT: pextrw $3, %xmm2, %r14d +; SSE3-NEXT: addl %eax, %r14d ; SSE3-NEXT: pextrw $4, %xmm2, %eax -; SSE3-NEXT: pextrw $5, %xmm2, %r15d -; SSE3-NEXT: addl %eax, %r15d +; SSE3-NEXT: pextrw $5, %xmm2, %r12d +; SSE3-NEXT: addl %eax, %r12d ; SSE3-NEXT: pextrw $6, %xmm2, %eax -; SSE3-NEXT: pextrw $7, %xmm2, %r8d -; SSE3-NEXT: addl %eax, %r8d -; SSE3-NEXT: movd %xmm3, %eax -; SSE3-NEXT: pextrw $1, %xmm3, %r9d +; SSE3-NEXT: pextrw $7, %xmm2, %r9d ; SSE3-NEXT: addl %eax, %r9d -; SSE3-NEXT: pextrw $2, %xmm3, %eax -; SSE3-NEXT: pextrw $3, %xmm3, %esi -; SSE3-NEXT: addl %eax, %esi -; SSE3-NEXT: pextrw $4, %xmm3, %eax -; SSE3-NEXT: pextrw $5, %xmm3, %edi -; SSE3-NEXT: addl %eax, %edi -; SSE3-NEXT: pextrw $6, %xmm3, %ecx +; SSE3-NEXT: movd %xmm3, %eax +; SSE3-NEXT: pextrw $1, %xmm3, %ebp +; SSE3-NEXT: addl %eax, %ebp +; SSE3-NEXT: pextrw $2, %xmm3, %edx +; SSE3-NEXT: pextrw $3, %xmm3, %edi +; SSE3-NEXT: addl %edx, %edi +; SSE3-NEXT: pextrw $4, %xmm3, %edx +; SSE3-NEXT: pextrw $5, %xmm3, %ecx +; SSE3-NEXT: addl %edx, %ecx +; SSE3-NEXT: pextrw $6, %xmm3, %edx ; SSE3-NEXT: pextrw $7, %xmm3, %eax -; SSE3-NEXT: addl %ecx, %eax -; SSE3-NEXT: movd %edx, %xmm8 -; SSE3-NEXT: movd %r13d, %xmm3 -; SSE3-NEXT: movd %ebp, %xmm9 -; SSE3-NEXT: movd %r11d, %xmm4 -; SSE3-NEXT: movd %ebx, %xmm10 -; SSE3-NEXT: movd %r10d, %xmm7 -; SSE3-NEXT: movd %r14d, %xmm11 +; SSE3-NEXT: addl %edx, %eax +; SSE3-NEXT: movd %esi, %xmm8 +; SSE3-NEXT: movd %r8d, %xmm3 +; SSE3-NEXT: movd %ebx, %xmm9 +; SSE3-NEXT: movd %r13d, %xmm4 +; SSE3-NEXT: movd %r15d, %xmm10 +; SSE3-NEXT: movd %r11d, %xmm7 +; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm11 # 4-byte Folded Reload +; SSE3-NEXT: # xmm11 = mem[0],zero,zero,zero ; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload ; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero ; SSE3-NEXT: movd %eax, %xmm12 -; SSE3-NEXT: movd %r8d, %xmm6 -; SSE3-NEXT: movd %esi, %xmm13 -; SSE3-NEXT: movd %r12d, %xmm5 -; SSE3-NEXT: movd %edi, %xmm14 -; SSE3-NEXT: movd %r15d, %xmm2 -; SSE3-NEXT: movd %r9d, %xmm15 -; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload -; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero +; SSE3-NEXT: movd %ecx, %xmm6 +; SSE3-NEXT: movd %edi, %xmm13 +; SSE3-NEXT: movd %ebp, %xmm5 +; SSE3-NEXT: movd %r9d, %xmm14 +; SSE3-NEXT: movd %r12d, %xmm2 +; SSE3-NEXT: movd %r14d, %xmm15 +; SSE3-NEXT: movd %r10d, %xmm1 ; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r13 @@ -858,12 +858,12 @@ define <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) { ; SSE-NEXT: movd %xmm0, %edi ; SSE-NEXT: subl %edi, %esi ; SSE-NEXT: movd %esi, %xmm0 -; SSE-NEXT: movd %ecx, %xmm1 +; SSE-NEXT: movd %edx, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: movd %edx, %xmm2 +; SSE-NEXT: movd %ecx, %xmm2 ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: not_a_hsub_1: @@ -919,11 +919,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movaps %xmm1, %xmm4 ; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: subss %xmm4, %xmm3 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] -; SSE-NEXT: subss %xmm3, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] +; SSE-NEXT: subss %xmm4, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: not_a_hsub_2: @@ -1162,19 +1162,19 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %r9d ; SSE3-NEXT: addl %edx, %r9d -; SSE3-NEXT: movd %xmm2, %esi +; SSE3-NEXT: movd %xmm2, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r10d -; SSE3-NEXT: addl %esi, %r10d -; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %esi +; SSE3-NEXT: addl %edx, %esi +; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] ; SSE3-NEXT: movd %xmm0, %edi -; SSE3-NEXT: addl %esi, %edi +; SSE3-NEXT: addl %edx, %edi ; SSE3-NEXT: movd %xmm1, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE3-NEXT: movd %xmm0, %r11d -; SSE3-NEXT: addl %eax, %r11d +; SSE3-NEXT: movd %xmm0, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE3-NEXT: movd %xmm0, %eax ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] @@ -1185,24 +1185,24 @@ define <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { ; SSE3-NEXT: movd %xmm0, %edx ; SSE3-NEXT: addl %eax, %edx ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] -; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: movd %xmm0, %r11d ; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3] -; SSE3-NEXT: movd %xmm0, %esi -; SSE3-NEXT: addl %eax, %esi +; SSE3-NEXT: movd %xmm0, %eax +; SSE3-NEXT: addl %r11d, %eax ; SSE3-NEXT: movd %edi, %xmm0 -; SSE3-NEXT: movd %r9d, %xmm1 +; SSE3-NEXT: movd %esi, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE3-NEXT: movd %r10d, %xmm2 +; SSE3-NEXT: movd %r9d, %xmm2 ; SSE3-NEXT: movd %r8d, %xmm0 ; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE3-NEXT: movd %esi, %xmm1 -; SSE3-NEXT: movd %ecx, %xmm2 +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE3-NEXT: movd %eax, %xmm1 +; SSE3-NEXT: movd %edx, %xmm2 ; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE3-NEXT: movd %edx, %xmm3 -; SSE3-NEXT: movd %r11d, %xmm1 +; SSE3-NEXT: movd %ecx, %xmm3 +; SSE3-NEXT: movd %r10d, %xmm1 ; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE3-NEXT: retq ; ; SSSE3-LABEL: avx2_hadd_d: @@ -1293,15 +1293,14 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) { ; SSE3-NEXT: .Lcfi23: ; SSE3-NEXT: .cfi_offset %rbp, -16 ; SSE3-NEXT: movd %xmm0, %eax -; SSE3-NEXT: pextrw $1, %xmm0, %ecx -; SSE3-NEXT: addl %eax, %ecx -; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE3-NEXT: pextrw $1, %xmm0, %r10d +; SSE3-NEXT: addl %eax, %r10d ; SSE3-NEXT: pextrw $2, %xmm0, %eax -; SSE3-NEXT: pextrw $3, %xmm0, %r15d -; SSE3-NEXT: addl %eax, %r15d +; SSE3-NEXT: pextrw $3, %xmm0, %r11d +; SSE3-NEXT: addl %eax, %r11d ; SSE3-NEXT: pextrw $4, %xmm0, %eax -; SSE3-NEXT: pextrw $5, %xmm0, %r14d -; SSE3-NEXT: addl %eax, %r14d +; SSE3-NEXT: pextrw $5, %xmm0, %r12d +; SSE3-NEXT: addl %eax, %r12d ; SSE3-NEXT: pextrw $6, %xmm0, %eax ; SSE3-NEXT: pextrw $7, %xmm0, %r13d ; SSE3-NEXT: addl %eax, %r13d @@ -1310,70 +1309,71 @@ define <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) { ; SSE3-NEXT: addl %eax, %ecx ; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill ; SSE3-NEXT: pextrw $2, %xmm1, %eax -; SSE3-NEXT: pextrw $3, %xmm1, %r11d -; SSE3-NEXT: addl %eax, %r11d -; SSE3-NEXT: pextrw $4, %xmm1, %eax -; SSE3-NEXT: pextrw $5, %xmm1, %r10d -; SSE3-NEXT: addl %eax, %r10d -; SSE3-NEXT: pextrw $6, %xmm1, %eax -; SSE3-NEXT: pextrw $7, %xmm1, %r12d -; SSE3-NEXT: addl %eax, %r12d -; SSE3-NEXT: movd %xmm2, %eax -; SSE3-NEXT: pextrw $1, %xmm2, %ebx -; SSE3-NEXT: addl %eax, %ebx -; SSE3-NEXT: pextrw $2, %xmm2, %eax -; SSE3-NEXT: pextrw $3, %xmm2, %ecx +; SSE3-NEXT: pextrw $3, %xmm1, %ecx ; SSE3-NEXT: addl %eax, %ecx +; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; SSE3-NEXT: pextrw $4, %xmm1, %eax +; SSE3-NEXT: pextrw $5, %xmm1, %r14d +; SSE3-NEXT: addl %eax, %r14d +; SSE3-NEXT: pextrw $6, %xmm1, %esi +; SSE3-NEXT: pextrw $7, %xmm1, %r15d +; SSE3-NEXT: addl %esi, %r15d +; SSE3-NEXT: movd %xmm2, %esi +; SSE3-NEXT: pextrw $1, %xmm2, %ebp +; SSE3-NEXT: addl %esi, %ebp +; SSE3-NEXT: pextrw $2, %xmm2, %esi +; SSE3-NEXT: pextrw $3, %xmm2, %edi +; SSE3-NEXT: addl %esi, %edi ; SSE3-NEXT: pextrw $4, %xmm2, %esi -; SSE3-NEXT: pextrw $5, %xmm2, %r8d -; SSE3-NEXT: addl %esi, %r8d +; SSE3-NEXT: pextrw $5, %xmm2, %eax +; SSE3-NEXT: addl %esi, %eax ; SSE3-NEXT: pextrw $6, %xmm2, %esi -; SSE3-NEXT: pextrw $7, %xmm2, %edx -; SSE3-NEXT: addl %esi, %edx -; SSE3-NEXT: movd %xmm3, %edi +; SSE3-NEXT: pextrw $7, %xmm2, %ecx +; SSE3-NEXT: addl %esi, %ecx +; SSE3-NEXT: movd %xmm3, %ebx ; SSE3-NEXT: pextrw $1, %xmm3, %r9d -; SSE3-NEXT: addl %edi, %r9d -; SSE3-NEXT: pextrw $2, %xmm3, %ebp -; SSE3-NEXT: pextrw $3, %xmm3, %edi -; SSE3-NEXT: addl %ebp, %edi -; SSE3-NEXT: pextrw $4, %xmm3, %eax -; SSE3-NEXT: pextrw $5, %xmm3, %ebp -; SSE3-NEXT: addl %eax, %ebp -; SSE3-NEXT: pextrw $6, %xmm3, %esi -; SSE3-NEXT: pextrw $7, %xmm3, %eax -; SSE3-NEXT: addl %esi, %eax -; SSE3-NEXT: movd %edx, %xmm8 -; SSE3-NEXT: movd %r13d, %xmm3 -; SSE3-NEXT: movd %ecx, %xmm9 -; SSE3-NEXT: movd %r15d, %xmm4 -; SSE3-NEXT: movd %r8d, %xmm10 -; SSE3-NEXT: movd %r14d, %xmm7 -; SSE3-NEXT: movd %ebx, %xmm11 -; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload -; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero -; SSE3-NEXT: movd %eax, %xmm12 -; SSE3-NEXT: movd %r12d, %xmm6 -; SSE3-NEXT: movd %edi, %xmm13 -; SSE3-NEXT: movd %r11d, %xmm5 -; SSE3-NEXT: movd %ebp, %xmm14 -; SSE3-NEXT: movd %r10d, %xmm2 -; SSE3-NEXT: movd %r9d, %xmm15 +; SSE3-NEXT: addl %ebx, %r9d +; SSE3-NEXT: pextrw $2, %xmm3, %edx +; SSE3-NEXT: pextrw $3, %xmm3, %ebx +; SSE3-NEXT: addl %edx, %ebx +; SSE3-NEXT: pextrw $4, %xmm3, %edx +; SSE3-NEXT: pextrw $5, %xmm3, %esi +; SSE3-NEXT: addl %edx, %esi +; SSE3-NEXT: pextrw $6, %xmm3, %r8d +; SSE3-NEXT: pextrw $7, %xmm3, %edx +; SSE3-NEXT: addl %r8d, %edx +; SSE3-NEXT: movd %ecx, %xmm8 +; SSE3-NEXT: movd %eax, %xmm3 +; SSE3-NEXT: movd %edi, %xmm9 +; SSE3-NEXT: movd %ebp, %xmm4 +; SSE3-NEXT: movd %r13d, %xmm10 +; SSE3-NEXT: movd %r12d, %xmm7 +; SSE3-NEXT: movd %r11d, %xmm11 +; SSE3-NEXT: movd %r10d, %xmm0 +; SSE3-NEXT: movd %edx, %xmm12 +; SSE3-NEXT: movd %esi, %xmm6 +; SSE3-NEXT: movd %ebx, %xmm13 +; SSE3-NEXT: movd %r9d, %xmm5 +; SSE3-NEXT: movd %r15d, %xmm14 +; SSE3-NEXT: movd %r14d, %xmm2 +; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm15 # 4-byte Folded Reload +; SSE3-NEXT: # xmm15 = mem[0],zero,zero,zero ; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload ; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero ; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] ; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] +; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE3-NEXT: popq %rbx ; SSE3-NEXT: popq %r12 ; SSE3-NEXT: popq %r13 diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll index 6d79d4de5206..091d1a22dbcd 100644 --- a/test/CodeGen/X86/haddsub-undef.ll +++ b/test/CodeGen/X86/haddsub-undef.ll @@ -171,9 +171,8 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3] -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test8_undef: diff --git a/test/CodeGen/X86/hoist-spill.ll b/test/CodeGen/X86/hoist-spill.ll index afabf96b12a3..03f558fc3ae2 100644 --- a/test/CodeGen/X86/hoist-spill.ll +++ b/test/CodeGen/X86/hoist-spill.ll @@ -3,10 +3,8 @@ ; Check no spills to the same stack slot after hoisting. ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp) ; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp) -; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp) ; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp) ; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp) -; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp) target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll index 786534b00d39..56f4161147b4 100644 --- a/test/CodeGen/X86/loop-strength-reduce4.ll +++ b/test/CodeGen/X86/loop-strength-reduce4.ll @@ -4,16 +4,19 @@ ; By starting the IV at -64 instead of 0, a cmp is eliminated, ; as the flags from the add can be used directly. -; STATIC: movl $-64, [[ECX:%e..]] +; STATIC: movl $-64, [[EAX:%e..]] -; STATIC: movl [[EAX:%e..]], _state+76([[ECX]]) -; STATIC: addl $16, [[ECX]] +; STATIC: movl %{{.+}}, _state+76([[EAX]]) +; STATIC: addl $16, [[EAX]] ; STATIC: jne -; In PIC mode the symbol can't be folded, so the change-compare-stride -; trick applies. +; The same for PIC mode. -; PIC: cmpl $64 +; PIC: movl $-64, [[EAX:%e..]] + +; PIC: movl %{{.+}}, 76(%{{.+}},[[EAX]]) +; PIC: addl $16, [[EAX]] +; PIC: jne @state = external global [0 x i32] ; <[0 x i32]*> [#uses=4] @S = external global [0 x i32] ; <[0 x i32]*> [#uses=4] diff --git a/test/CodeGen/X86/madd.ll b/test/CodeGen/X86/madd.ll index af86df510016..7c2bb822c967 100644 --- a/test/CodeGen/X86/madd.ll +++ b/test/CodeGen/X86/madd.ll @@ -9,17 +9,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB0_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi), %xmm2 -; SSE2-NEXT: movdqu (%rsi), %xmm3 +; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2 +; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3 ; SSE2-NEXT: pmaddwd %xmm2, %xmm3 ; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $16, %rsi -; SSE2-NEXT: addq $16, %rdi -; SSE2-NEXT: addq $-8, %rax +; SSE2-NEXT: addq $8, %rcx +; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB0_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm0, %xmm1 @@ -34,17 +34,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; AVX2: # BB#0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB0_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX2-NEXT: vmovdqu (%rsi), %xmm2 -; AVX2-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2 +; AVX2-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 ; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: addq $16, %rsi -; AVX2-NEXT: addq $16, %rdi -; AVX2-NEXT: addq $-8, %rax +; AVX2-NEXT: addq $8, %rcx +; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB0_1 ; AVX2-NEXT: # BB#2: # %middle.block ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -60,17 +60,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly ; AVX512: # BB#0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB0_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX512-NEXT: vmovdqu (%rsi), %xmm2 -; AVX512-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2 +; AVX512-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2 +; AVX512-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2 ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2 ; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0 -; AVX512-NEXT: addq $16, %rsi -; AVX512-NEXT: addq $16, %rdi -; AVX512-NEXT: addq $-8, %rax +; AVX512-NEXT: addq $8, %rcx +; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB0_1 ; AVX512-NEXT: # BB#2: # %middle.block ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -118,12 +118,13 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: .p2align 4, 0x90 ; SSE2-NEXT: .LBB1_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi), %xmm2 -; SSE2-NEXT: movdqu (%rsi), %xmm3 +; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2 +; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pmulhuw %xmm2, %xmm4 ; SSE2-NEXT: pmullw %xmm2, %xmm3 @@ -132,9 +133,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; SSE2-NEXT: paddd %xmm2, %xmm0 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] ; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $16, %rsi -; SSE2-NEXT: addq $16, %rdi -; SSE2-NEXT: addq $-8, %rax +; SSE2-NEXT: addq $8, %rcx +; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB1_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm1, %xmm0 @@ -149,6 +149,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX2: # BB#0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB1_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -156,9 +157,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; AVX2-NEXT: addq $16, %rsi -; AVX2-NEXT: addq $16, %rdi -; AVX2-NEXT: addq $-8, %rax +; AVX2-NEXT: addq $8, %rcx +; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB1_1 ; AVX2-NEXT: # BB#2: # %middle.block ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -174,6 +174,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX512: # BB#0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB1_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 @@ -181,9 +182,8 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly ; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX512-NEXT: vpmulld %ymm1, %ymm2, %ymm1 ; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: addq $16, %rsi -; AVX512-NEXT: addq $16, %rdi -; AVX512-NEXT: addq $-8, %rax +; AVX512-NEXT: addq $8, %rcx +; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB1_1 ; AVX512-NEXT: # BB#2: # %middle.block ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -231,6 +231,7 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movl %edx, %eax ; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -263,9 +264,8 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] ; SSE2-NEXT: psrad $16, %xmm4 ; SSE2-NEXT: paddd %xmm4, %xmm2 -; SSE2-NEXT: addq $16, %rsi -; SSE2-NEXT: addq $16, %rdi -; SSE2-NEXT: addq $-16, %rax +; SSE2-NEXT: addq $16, %rcx +; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB2_1 ; SSE2-NEXT: # BB#2: # %middle.block ; SSE2-NEXT: paddd %xmm3, %xmm0 @@ -282,17 +282,17 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; AVX2: # BB#0: # %entry ; AVX2-NEXT: movl %edx, %eax ; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0 +; AVX2-NEXT: xorl %ecx, %ecx ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: .p2align 4, 0x90 ; AVX2-NEXT: .LBB2_1: # %vector.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2 -; AVX2-NEXT: vpmovsxbw (%rsi), %ymm3 +; AVX2-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2 +; AVX2-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3 ; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 -; AVX2-NEXT: addq $16, %rsi -; AVX2-NEXT: addq $16, %rdi -; AVX2-NEXT: addq $-16, %rax +; AVX2-NEXT: addq $16, %rcx +; AVX2-NEXT: cmpq %rcx, %rax ; AVX2-NEXT: jne .LBB2_1 ; AVX2-NEXT: # BB#2: # %middle.block ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 @@ -309,18 +309,18 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3 ; AVX512: # BB#0: # %entry ; AVX512-NEXT: movl %edx, %eax ; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX512-NEXT: .p2align 4, 0x90 ; AVX512-NEXT: .LBB2_1: # %vector.body ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 -; AVX512-NEXT: vpmovsxbw (%rdi), %ymm2 -; AVX512-NEXT: vpmovsxbw (%rsi), %ymm3 +; AVX512-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2 +; AVX512-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3 ; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2 ; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0 -; AVX512-NEXT: addq $16, %rsi -; AVX512-NEXT: addq $16, %rdi -; AVX512-NEXT: addq $-16, %rax +; AVX512-NEXT: addq $16, %rcx +; AVX512-NEXT: cmpq %rcx, %rax ; AVX512-NEXT: jne .LBB2_1 ; AVX512-NEXT: # BB#2: # %middle.block ; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1] diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 8c0a4d4f1752..61aa05a5270b 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: count_up ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: incq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_up(double* %d, i64 %n) nounwind { @@ -38,7 +38,7 @@ return: ; CHECK-LABEL: count_down ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_down(double* %d, i64 %n) nounwind { @@ -71,7 +71,7 @@ return: ; CHECK-LABEL: count_up_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: incq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_up_signed(double* %d, i64 %n) nounwind { @@ -106,7 +106,7 @@ return: ; CHECK-LABEL: count_down_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @count_down_signed(double* %d, i64 %n) nounwind { @@ -141,7 +141,7 @@ return: ; CHECK-LABEL: another_count_up ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_up(double* %d, i64 %n) nounwind { @@ -174,7 +174,7 @@ return: ; CHECK-LABEL: another_count_down ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq $-8, +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_down(double* %d, i64 %n) nounwind { @@ -207,7 +207,7 @@ return: ; CHECK-LABEL: another_count_up_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: addq +; CHECK: addq $8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_up_signed(double* %d, i64 %n) nounwind { @@ -242,7 +242,7 @@ return: ; CHECK-LABEL: another_count_down_signed ; CHECK-NOT: {{and|movz|sar|shl}} -; CHECK: decq +; CHECK: addq $-8 ; CHECK-NOT: {{and|movz|sar|shl}} ; CHECK: jne define void @another_count_down_signed(double* %d, i64 %n) nounwind { diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll index ce1bb3b06ce5..4e2475b1c67d 100644 --- a/test/CodeGen/X86/memcmp.ll +++ b/test/CodeGen/X86/memcmp.ll @@ -10,9 +10,28 @@ declare i32 @memcmp(i8*, i8*, i64) -define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length2(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length2: ; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $2 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length2: +; X64: # BB#0: +; X64-NEXT: movl $2, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind + ret i32 %m +} + +define i1 @length2_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length2_eq: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movzwl (%ecx), %ecx @@ -20,7 +39,7 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length2: +; X64-LABEL: length2_eq: ; X64: # BB#0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: cmpw (%rsi), %ax @@ -31,8 +50,8 @@ define i1 @length2(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length2_const: +define i1 @length2_eq_const(i8* %X) nounwind { +; X32-LABEL: length2_eq_const: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movzwl (%eax), %eax @@ -40,7 +59,7 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length2_const: +; X64-LABEL: length2_eq_const: ; X64: # BB#0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: cmpl $12849, %eax # imm = 0x3231 @@ -51,8 +70,8 @@ define i1 @length2_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { -; X32-LABEL: length2_nobuiltin_attr: +define i1 @length2_eq_nobuiltin_attr(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length2_eq_nobuiltin_attr: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $2 @@ -64,7 +83,7 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length2_nobuiltin_attr: +; X64-LABEL: length2_eq_nobuiltin_attr: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $2, %edx @@ -78,9 +97,74 @@ define i1 @length2_nobuiltin_attr(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length3(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length3: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length3: +; X64: # BB#0: +; X64-NEXT: movl $3, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind + ret i32 %m +} + +define i1 @length3_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length3_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $3 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length3_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $3, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length4(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length4: ; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $4 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length4: +; X64: # BB#0: +; X64-NEXT: movl $4, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind + ret i32 %m +} + +define i1 @length4_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length4_eq: +; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx @@ -88,7 +172,7 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length4: +; X64-LABEL: length4_eq: ; X64: # BB#0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: cmpl (%rsi), %eax @@ -99,15 +183,15 @@ define i1 @length4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length4_const: +define i1 @length4_eq_const(i8* %X) nounwind { +; X32-LABEL: length4_eq_const: ; X32: # BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: cmpl $875770417, (%eax) # imm = 0x34333231 ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length4_const: +; X64-LABEL: length4_eq_const: ; X64: # BB#0: ; X64-NEXT: cmpl $875770417, (%rdi) # imm = 0x34333231 ; X64-NEXT: sete %al @@ -117,7 +201,53 @@ define i1 @length4_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +define i32 @length5(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length5: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length5: +; X64: # BB#0: +; X64-NEXT: movl $5, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind + ret i32 %m +} + +define i1 @length5_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length5_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $5 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length5_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $5, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length8(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length8: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -126,11 +256,30 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length8: +; X64: # BB#0: +; X64-NEXT: movl $8, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind + ret i32 %m +} + +define i1 @length8_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length8_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $8 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length8: +; X64-LABEL: length8_eq: ; X64: # BB#0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: cmpq (%rsi), %rax @@ -141,8 +290,8 @@ define i1 @length8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length8_const: +define i1 @length8_eq_const(i8* %X) nounwind { +; X32-LABEL: length8_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $8 @@ -154,7 +303,7 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length8_const: +; X64-LABEL: length8_eq_const: ; X64: # BB#0: ; X64-NEXT: movabsq $3978425819141910832, %rax # imm = 0x3736353433323130 ; X64-NEXT: cmpq %rax, (%rdi) @@ -165,7 +314,55 @@ define i1 @length8_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length16(i8* %x, i8* %y) nounwind { +define i1 @length12_eq(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length12_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setne %al +; X32-NEXT: retl +; +; X64-LABEL: length12_eq: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: movl $12, %edx +; X64-NEXT: callq memcmp +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al +; X64-NEXT: popq %rcx +; X64-NEXT: retq + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind + %c = icmp ne i32 %m, 0 + ret i1 %c +} + +define i32 @length12(i8* %X, i8* %Y) nounwind { +; X32-LABEL: length12: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $12 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length12: +; X64: # BB#0: +; X64-NEXT: movl $12, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind + ret i32 %m +} + +; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329 + +define i32 @length16(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length16: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -174,11 +371,30 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length16: +; X64: # BB#0: +; X64-NEXT: movl $16, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind + ret i32 %m +} + +define i1 @length16_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length16_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $16 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; SSE2-LABEL: length16: +; SSE2-LABEL: length16_eq: ; SSE2: # BB#0: ; SSE2-NEXT: movdqu (%rsi), %xmm0 ; SSE2-NEXT: movdqu (%rdi), %xmm1 @@ -188,7 +404,7 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; -; AVX2-LABEL: length16: +; AVX2-LABEL: length16_eq: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 @@ -201,8 +417,8 @@ define i1 @length16(i8* %x, i8* %y) nounwind { ret i1 %cmp } -define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length16_const: +define i1 @length16_eq_const(i8* %X) nounwind { +; X32-LABEL: length16_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $16 @@ -214,7 +430,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; SSE2-LABEL: length16_const: +; SSE2-LABEL: length16_eq_const: ; SSE2: # BB#0: ; SSE2-NEXT: movdqu (%rdi), %xmm0 ; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 @@ -223,7 +439,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; -; AVX2-LABEL: length16_const: +; AVX2-LABEL: length16_eq_const: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %xmm0 ; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0 @@ -236,7 +452,7 @@ define i1 @length16_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length32(i8* %x, i8* %y) nounwind { +define i32 @length32(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length32: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -245,11 +461,32 @@ define i1 @length32(i8* %x, i8* %y) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length32: +; X64: # BB#0: +; X64-NEXT: movl $32, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 32) nounwind + ret i32 %m +} + +; PR33325 - https://bugs.llvm.org/show_bug.cgi?id=33325 + +define i1 @length32_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length32_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $32 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; SSE2-LABEL: length32: +; SSE2-LABEL: length32_eq: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl $32, %edx @@ -259,7 +496,7 @@ define i1 @length32(i8* %x, i8* %y) nounwind { ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; -; AVX2-LABEL: length32: +; AVX2-LABEL: length32_eq: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %ymm0 ; AVX2-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 @@ -273,8 +510,8 @@ define i1 @length32(i8* %x, i8* %y) nounwind { ret i1 %cmp } -define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length32_const: +define i1 @length32_eq_const(i8* %X) nounwind { +; X32-LABEL: length32_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $32 @@ -286,7 +523,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; SSE2-LABEL: length32_const: +; SSE2-LABEL: length32_eq_const: ; SSE2: # BB#0: ; SSE2-NEXT: pushq %rax ; SSE2-NEXT: movl $.L.str, %esi @@ -297,7 +534,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { ; SSE2-NEXT: popq %rcx ; SSE2-NEXT: retq ; -; AVX2-LABEL: length32_const: +; AVX2-LABEL: length32_eq_const: ; AVX2: # BB#0: ; AVX2-NEXT: vmovdqu (%rdi), %ymm0 ; AVX2-NEXT: vpcmpeqb {{.*}}(%rip), %ymm0, %ymm0 @@ -311,7 +548,7 @@ define i1 @length32_const(i8* %X, i32* nocapture %P) nounwind { ret i1 %c } -define i1 @length64(i8* %x, i8* %y) nounwind { +define i32 @length64(i8* %X, i8* %Y) nounwind { ; X32-LABEL: length64: ; X32: # BB#0: ; X32-NEXT: pushl $0 @@ -320,11 +557,30 @@ define i1 @length64(i8* %x, i8* %y) nounwind { ; X32-NEXT: pushl {{[0-9]+}}(%esp) ; X32-NEXT: calll memcmp ; X32-NEXT: addl $16, %esp +; X32-NEXT: retl +; +; X64-LABEL: length64: +; X64: # BB#0: +; X64-NEXT: movl $64, %edx +; X64-NEXT: jmp memcmp # TAILCALL + %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 64) nounwind + ret i32 %m +} + +define i1 @length64_eq(i8* %x, i8* %y) nounwind { +; X32-LABEL: length64_eq: +; X32: # BB#0: +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $64 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll memcmp +; X32-NEXT: addl $16, %esp ; X32-NEXT: testl %eax, %eax ; X32-NEXT: setne %al ; X32-NEXT: retl ; -; X64-LABEL: length64: +; X64-LABEL: length64_eq: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $64, %edx @@ -338,8 +594,8 @@ define i1 @length64(i8* %x, i8* %y) nounwind { ret i1 %cmp } -define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind { -; X32-LABEL: length64_const: +define i1 @length64_eq_const(i8* %X) nounwind { +; X32-LABEL: length64_eq_const: ; X32: # BB#0: ; X32-NEXT: pushl $0 ; X32-NEXT: pushl $64 @@ -351,7 +607,7 @@ define i1 @length64_const(i8* %X, i32* nocapture %P) nounwind { ; X32-NEXT: sete %al ; X32-NEXT: retl ; -; X64-LABEL: length64_const: +; X64-LABEL: length64_eq_const: ; X64: # BB#0: ; X64-NEXT: pushq %rax ; X64-NEXT: movl $.L.str, %esi diff --git a/test/CodeGen/X86/merge-consecutive-loads-128.ll b/test/CodeGen/X86/merge-consecutive-loads-128.ll index 71417694b0d4..1d5829407b71 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -269,10 +269,8 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; SSE2-LABEL: merge_4f32_f32_012u: ; SSE2: # BB#0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_012u: @@ -290,11 +288,11 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; X32-SSE1-LABEL: merge_4f32_f32_012u: ; X32-SSE1: # BB#0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4f32_f32_012u: @@ -320,10 +318,8 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; SSE2-LABEL: merge_4f32_f32_019u: ; SSE2: # BB#0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_019u: @@ -341,11 +337,11 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; X32-SSE1-LABEL: merge_4f32_f32_019u: ; X32-SSE1: # BB#0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4f32_f32_019u: @@ -1037,13 +1033,11 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp { ; SSE2-LABEL: merge_4f32_f32_2345_volatile: ; SSE2: # BB#0: -; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_2345_volatile: @@ -1065,13 +1059,13 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n ; X32-SSE1-LABEL: merge_4f32_f32_2345_volatile: ; X32-SSE1: # BB#0: ; X32-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X32-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-SSE1-NEXT: retl ; ; X32-SSE41-LABEL: merge_4f32_f32_2345_volatile: diff --git a/test/CodeGen/X86/mul-constant-i16.ll b/test/CodeGen/X86/mul-constant-i16.ll index e3e2737cf3e6..7b39bfe1c484 100644 --- a/test/CodeGen/X86/mul-constant-i16.ll +++ b/test/CodeGen/X86/mul-constant-i16.ll @@ -188,13 +188,16 @@ define i16 @test_mul_by_11(i16 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $11, %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_11: ; X64: # BB#0: -; X64-NEXT: imull $11, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,2), %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 11 @@ -225,13 +228,16 @@ define i16 @test_mul_by_13(i16 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $13, %eax, %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_13: ; X64: # BB#0: -; X64-NEXT: imull $13, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 13 @@ -241,14 +247,19 @@ define i16 @test_mul_by_13(i16 %x) { define i16 @test_mul_by_14(i16 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $14, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_14: ; X64: # BB#0: -; X64-NEXT: imull $14, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 14 @@ -337,14 +348,19 @@ define i16 @test_mul_by_18(i16 %x) { define i16 @test_mul_by_19(i16 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $19, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_19: ; X64: # BB#0: -; X64-NEXT: imull $19, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: shll $2, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 19 @@ -375,13 +391,16 @@ define i16 @test_mul_by_21(i16 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $21, %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_21: ; X64: # BB#0: -; X64-NEXT: imull $21, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 21 @@ -391,14 +410,19 @@ define i16 @test_mul_by_21(i16 %x) { define i16 @test_mul_by_22(i16 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $22, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_22: ; X64: # BB#0: -; X64-NEXT: imull $22, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,4), %eax +; X64-NEXT: leal (%rdi,%rax,4), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 22 @@ -408,14 +432,19 @@ define i16 @test_mul_by_22(i16 %x) { define i16 @test_mul_by_23(i16 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $23, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_23: ; X64: # BB#0: -; X64-NEXT: imull $23, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,2), %eax +; X64-NEXT: shll $3, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 23 @@ -465,14 +494,19 @@ define i16 @test_mul_by_25(i16 %x) { define i16 @test_mul_by_26(i16 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $26, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_26: ; X64: # BB#0: -; X64-NEXT: imull $26, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 26 @@ -502,14 +536,19 @@ define i16 @test_mul_by_27(i16 %x) { define i16 @test_mul_by_28(i16 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $28, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_28: ; X64: # BB#0: -; X64-NEXT: imull $28, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 28 @@ -519,14 +558,21 @@ define i16 @test_mul_by_28(i16 %x) { define i16 @test_mul_by_29(i16 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $29, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_29: ; X64: # BB#0: -; X64-NEXT: imull $29, %edi, %eax +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal (%rdi,%rdi,8), %eax +; X64-NEXT: leal (%rax,%rax,2), %eax +; X64-NEXT: addl %edi, %eax +; X64-NEXT: addl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 29 @@ -536,14 +582,20 @@ define i16 @test_mul_by_29(i16 %x) { define i16 @test_mul_by_30(i16 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: imull $30, %eax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X86-NEXT: retl ; ; X64-LABEL: test_mul_by_30: ; X64: # BB#0: -; X64-NEXT: imull $30, %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shll $5, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: subl %edi, %eax ; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> ; X64-NEXT: retq %mul = mul nsw i16 %x, 30 @@ -587,3 +639,30 @@ define i16 @test_mul_by_32(i16 %x) { %mul = mul nsw i16 %x, 32 ret i16 %mul } + +; (x*9+42)*(x*5+2) +define i16 @test_mul_spec(i16 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> +; X86-NEXT: retl +; +; X64-LABEL: test_mul_spec: +; X64: # BB#0: +; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-NEXT: leal 42(%rdi,%rdi,8), %ecx +; X64-NEXT: leal 2(%rdi,%rdi,4), %eax +; X64-NEXT: imull %ecx, %eax +; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> +; X64-NEXT: retq + %mul = mul nsw i16 %x, 9 + %add = add nsw i16 %mul, 42 + %mul2 = mul nsw i16 %x, 5 + %add2 = add nsw i16 %mul2, 2 + %mul3 = mul nsw i16 %add, %add2 + ret i16 %mul3 +} diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll index 76e46e1f1b09..d545b477e102 100644 --- a/test/CodeGen/X86/mul-constant-i32.ll +++ b/test/CodeGen/X86/mul-constant-i32.ll @@ -1,6 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT define i32 @test_mul_by_1(i32 %x) { ; X86-LABEL: test_mul_by_1: @@ -8,10 +14,40 @@ define i32 @test_mul_by_1(i32 %x) { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_1: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_1: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_1: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_1: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_1: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_1: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_1: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_1: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 1 ret i32 %mul } @@ -23,11 +59,47 @@ define i32 @test_mul_by_2(i32 %x) { ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_2: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_2: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_2: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_2: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: addl %eax, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_2: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_2: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_2: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_2: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 2 ret i32 %mul } @@ -38,11 +110,46 @@ define i32 @test_mul_by_3(i32 %x) { ; X86-NEXT: imull $3, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_3: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_3: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_3: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_3: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_3: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_3: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_3: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_3: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 3 ret i32 %mul } @@ -54,11 +161,47 @@ define i32 @test_mul_by_4(i32 %x) { ; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_4: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_4: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_4: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_4: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $2, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_4: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_4: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_4: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_4: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 4 ret i32 %mul } @@ -69,11 +212,46 @@ define i32 @test_mul_by_5(i32 %x) { ; X86-NEXT: imull $5, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_5: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_5: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_5: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_5: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_5: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_5: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_5: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_5: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 5 ret i32 %mul } @@ -86,12 +264,46 @@ define i32 @test_mul_by_6(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_6: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_6: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_6: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_6: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_6: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_6: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_6: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_6: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 6 ret i32 %mul } @@ -104,12 +316,46 @@ define i32 @test_mul_by_7(i32 %x) { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_7: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_7: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_7: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_7: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_7: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_7: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_7: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_7: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 7 ret i32 %mul } @@ -121,11 +367,47 @@ define i32 @test_mul_by_8(i32 %x) { ; X86-NEXT: shll $3, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_8: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_8: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_8: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_8: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $3, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_8: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_8: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_8: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_8: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 8 ret i32 %mul } @@ -136,11 +418,46 @@ define i32 @test_mul_by_9(i32 %x) { ; X86-NEXT: imull $9, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_9: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_9: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_9: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_9: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_9: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_9: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_9: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_9: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 9 ret i32 %mul } @@ -153,12 +470,46 @@ define i32 @test_mul_by_10(i32 %x) { ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_10: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_10: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_10: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_10: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_10: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_10: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_10: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_10: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 10 ret i32 %mul } @@ -166,13 +517,49 @@ define i32 @test_mul_by_10(i32 %x) { define i32 @test_mul_by_11(i32 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: -; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_11: -; X64: # BB#0: -; X64-NEXT: imull $11, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_11: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_11: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_11: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_11: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_11: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_11: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_11: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 11 ret i32 %mul } @@ -185,12 +572,46 @@ define i32 @test_mul_by_12(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_12: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: shll $2, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_12: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_12: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_12: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_12: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_12: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_12: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_12: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 12 ret i32 %mul } @@ -198,13 +619,49 @@ define i32 @test_mul_by_12(i32 %x) { define i32 @test_mul_by_13(i32 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: -; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_13: -; X64: # BB#0: -; X64-NEXT: imull $13, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_13: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_13: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_13: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_13: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_13: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_13: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_13: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 13 ret i32 %mul } @@ -212,13 +669,52 @@ define i32 @test_mul_by_13(i32 %x) { define i32 @test_mul_by_14(i32 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: -; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_14: -; X64: # BB#0: -; X64-NEXT: imull $14, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_14: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_14: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_14: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_14: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_14: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_14: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_14: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 14 ret i32 %mul } @@ -231,12 +727,46 @@ define i32 @test_mul_by_15(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_15: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_15: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_15: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_15: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_15: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_15: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_15: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_15: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 15 ret i32 %mul } @@ -248,11 +778,47 @@ define i32 @test_mul_by_16(i32 %x) { ; X86-NEXT: shll $4, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_16: -; X64: # BB#0: -; X64-NEXT: shll $4, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_16: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_16: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_16: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $4, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_16: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_16: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_16: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shll $4, %edi # sched: [1:1.00] +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_16: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 16 ret i32 %mul } @@ -266,13 +832,49 @@ define i32 @test_mul_by_17(i32 %x) { ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_17: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $4, %eax -; X64-NEXT: leal (%rax,%rdi), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_17: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_17: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_17: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_17: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_17: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_17: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_17: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 17 ret i32 %mul } @@ -285,12 +887,46 @@ define i32 @test_mul_by_18(i32 %x) { ; X86-NEXT: leal (%eax,%eax,8), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_18: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: addl %edi, %edi -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_18: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_18: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_18: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_18: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_18: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_18: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50] +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_18: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 18 ret i32 %mul } @@ -298,13 +934,52 @@ define i32 @test_mul_by_18(i32 %x) { define i32 @test_mul_by_19(i32 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: -; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: shll $2, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_19: -; X64: # BB#0: -; X64-NEXT: imull $19, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_19: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_19: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_19: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_19: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_19: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_19: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_19: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 19 ret i32 %mul } @@ -317,12 +992,46 @@ define i32 @test_mul_by_20(i32 %x) { ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_20: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: shll $2, %edi -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_20: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_20: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_20: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_20: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_20: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_20: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_20: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 20 ret i32 %mul } @@ -330,13 +1039,49 @@ define i32 @test_mul_by_20(i32 %x) { define i32 @test_mul_by_21(i32 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: -; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_21: -; X64: # BB#0: -; X64-NEXT: imull $21, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_21: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_21: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_21: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_21: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_21: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_21: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_21: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 21 ret i32 %mul } @@ -344,13 +1089,52 @@ define i32 @test_mul_by_21(i32 %x) { define i32 @test_mul_by_22(i32 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: -; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,4), %eax +; X86-NEXT: leal (%ecx,%eax,4), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_22: -; X64: # BB#0: -; X64-NEXT: imull $22, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_22: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_22: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_22: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_22: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_22: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_22: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_22: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 22 ret i32 %mul } @@ -358,13 +1142,52 @@ define i32 @test_mul_by_22(i32 %x) { define i32 @test_mul_by_23(i32 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: -; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_23: -; X64: # BB#0: -; X64-NEXT: imull $23, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_23: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_23: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_23: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_23: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_23: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_23: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_23: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 23 ret i32 %mul } @@ -377,12 +1200,46 @@ define i32 @test_mul_by_24(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_24: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: shll $3, %edi -; X64-NEXT: leal (%rdi,%rdi,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_24: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_24: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_24: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_24: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_24: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_24: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_24: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 24 ret i32 %mul } @@ -395,12 +1252,46 @@ define i32 @test_mul_by_25(i32 %x) { ; X86-NEXT: leal (%eax,%eax,4), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_25: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi,4), %eax -; X64-NEXT: leal (%rax,%rax,4), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_25: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_25: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_25: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_25: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_25: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_25: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_25: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 25 ret i32 %mul } @@ -408,13 +1299,52 @@ define i32 @test_mul_by_25(i32 %x) { define i32 @test_mul_by_26(i32 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: -; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_26: -; X64: # BB#0: -; X64-NEXT: imull $26, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_26: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_26: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_26: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_26: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_26: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_26: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_26: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 26 ret i32 %mul } @@ -427,12 +1357,46 @@ define i32 @test_mul_by_27(i32 %x) { ; X86-NEXT: leal (%eax,%eax,2), %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_27: -; X64: # BB#0: -; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> -; X64-NEXT: leal (%rdi,%rdi,8), %eax -; X64-NEXT: leal (%rax,%rax,2), %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_27: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_27: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_27: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_27: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_27: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_27: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00] +; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_27: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 27 ret i32 %mul } @@ -440,13 +1404,52 @@ define i32 @test_mul_by_27(i32 %x) { define i32 @test_mul_by_28(i32 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: -; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_28: -; X64: # BB#0: -; X64-NEXT: imull $28, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_28: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_28: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_28: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_28: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_28: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_28: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_28: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 28 ret i32 %mul } @@ -454,13 +1457,55 @@ define i32 @test_mul_by_28(i32 %x) { define i32 @test_mul_by_29(i32 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: -; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal (%ecx,%ecx,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: addl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_29: -; X64: # BB#0: -; X64-NEXT: imull $29, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_29: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_29: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] +; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_29: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_29: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_29: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_29: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_29: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 29 ret i32 %mul } @@ -468,13 +1513,53 @@ define i32 @test_mul_by_29(i32 %x) { define i32 @test_mul_by_30(i32 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: -; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $5, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_30: -; X64: # BB#0: -; X64-NEXT: imull $30, %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_30: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_30: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_30: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_30: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_30: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_30: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_30: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 30 ret i32 %mul } @@ -488,12 +1573,46 @@ define i32 @test_mul_by_31(i32 %x) { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_31: -; X64: # BB#0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $5, %eax -; X64-NEXT: subl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_31: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_31: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_31: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_31: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_31: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_31: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00] +; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_31: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 31 ret i32 %mul } @@ -505,11 +1624,124 @@ define i32 @test_mul_by_32(i32 %x) { ; X86-NEXT: shll $5, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_32: -; X64: # BB#0: -; X64-NEXT: shll $5, %edi -; X64-NEXT: movl %edi, %eax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_32: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_32: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_32: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: shll $5, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_32: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_32: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_32: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shll $5, %edi # sched: [1:1.00] +; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_32: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i32 %x, 32 ret i32 %mul } + +; (x*9+42)*(x*5+2) +define i32 @test_mul_spec(i32 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NEXT: imull %ecx, %eax +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_spec: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] +; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] +; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_spec: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50] +; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50] +; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_spec: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: leal 42(%eax,%eax,8), %ecx +; X86-NOOPT-NEXT: leal 2(%eax,%eax,4), %eax +; X86-NOOPT-NEXT: imull %ecx, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_spec: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50] +; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] +; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] +; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] +; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_spec: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:0.50] +; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:0.50] +; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_spec: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] +; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] +; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_spec: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00] +; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00] +; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i32 %x, 9 + %add = add nsw i32 %mul, 42 + %mul2 = mul nsw i32 %x, 5 + %add2 = add nsw i32 %mul2, 2 + %mul3 = mul nsw i32 %add, %add2 + ret i32 %mul3 +} diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll index 8579179a8231..ea841c761c7b 100644 --- a/test/CodeGen/X86/mul-constant-i64.ll +++ b/test/CodeGen/X86/mul-constant-i64.ll @@ -1,18 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG +; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM +; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT -define i64 @test_mul_by_1(i64 %x) { +define i64 @test_mul_by_1(i64 %x) nounwind { ; X86-LABEL: test_mul_by_1: ; X86: # BB#0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_1: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_1: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_1: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_1: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_1: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_1: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_1: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_1: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 1 ret i64 %mul } @@ -26,10 +63,43 @@ define i64 @test_mul_by_2(i64 %x) { ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_2: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_2: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_2: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_2: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $1, %eax, %edx +; X86-NOOPT-NEXT: addl %eax, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_2: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_2: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_2: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_2: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 2 ret i64 %mul } @@ -43,10 +113,43 @@ define i64 @test_mul_by_3(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_3: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_3: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_3: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_3: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $3, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_3: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_3: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_3: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_3: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 3 ret i64 %mul } @@ -60,10 +163,43 @@ define i64 @test_mul_by_4(i64 %x) { ; X86-NEXT: shll $2, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_4: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_4: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_4: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_4: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $2, %eax, %edx +; X86-NOOPT-NEXT: shll $2, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_4: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_4: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_4: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_4: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 4 ret i64 %mul } @@ -77,10 +213,43 @@ define i64 @test_mul_by_5(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_5: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_5: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_5: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_5: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $5, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_5: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_5: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_5: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_5: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 5 ret i64 %mul } @@ -95,11 +264,46 @@ define i64 @test_mul_by_6(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_6: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_6: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_6: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_6: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $6, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_6: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_6: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_6: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_6: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 6 ret i64 %mul } @@ -115,11 +319,46 @@ define i64 @test_mul_by_7(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_7: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_7: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_7: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_7: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $7, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_7: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_7: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_7: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_7: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 7 ret i64 %mul } @@ -133,10 +372,43 @@ define i64 @test_mul_by_8(i64 %x) { ; X86-NEXT: shll $3, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_8: -; X64: # BB#0: -; X64-NEXT: leaq (,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_8: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_8: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_8: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $3, %eax, %edx +; X86-NOOPT-NEXT: shll $3, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_8: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_8: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_8: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_8: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 8 ret i64 %mul } @@ -150,10 +422,43 @@ define i64 @test_mul_by_9(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_9: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_9: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_9: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_9: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $9, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_9: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_9: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_9: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_9: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 9 ret i64 %mul } @@ -168,11 +473,46 @@ define i64 @test_mul_by_10(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_10: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_10: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_10: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_10: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $10, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_10: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_10: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_10: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_10: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 10 ret i64 %mul } @@ -180,16 +520,53 @@ define i64 @test_mul_by_10(i64 %x) { define i64 @test_mul_by_11(i64 %x) { ; X86-LABEL: test_mul_by_11: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %ecx ; X86-NEXT: movl $11, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_11: -; X64: # BB#0: -; X64-NEXT: imulq $11, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_11: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_11: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_11: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $11, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_11: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_11: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_11: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_11: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 11 ret i64 %mul } @@ -204,11 +581,46 @@ define i64 @test_mul_by_12(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,4), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_12: -; X64: # BB#0: -; X64-NEXT: shlq $2, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_12: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_12: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_12: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $12, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_12: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_12: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_12: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_12: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 12 ret i64 %mul } @@ -216,16 +628,53 @@ define i64 @test_mul_by_12(i64 %x) { define i64 @test_mul_by_13(i64 %x) { ; X86-LABEL: test_mul_by_13: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $13, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_13: -; X64: # BB#0: -; X64-NEXT: imulq $13, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_13: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_13: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_13: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $13, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_13: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_13: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_13: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_13: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 13 ret i64 %mul } @@ -233,16 +682,56 @@ define i64 @test_mul_by_13(i64 %x) { define i64 @test_mul_by_14(i64 %x) { ; X86-LABEL: test_mul_by_14: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $14, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_14: -; X64: # BB#0: -; X64-NEXT: imulq $14, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_14: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_14: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_14: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $14, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_14: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_14: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_14: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_14: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 14 ret i64 %mul } @@ -258,11 +747,46 @@ define i64 @test_mul_by_15(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_15: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: leaq (%rax,%rax,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_15: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_15: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_15: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $15, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_15: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_15: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_15: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_15: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 15 ret i64 %mul } @@ -276,11 +800,49 @@ define i64 @test_mul_by_16(i64 %x) { ; X86-NEXT: shll $4, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_16: -; X64: # BB#0: -; X64-NEXT: shlq $4, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_16: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_16: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $4, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_16: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $4, %eax, %edx +; X86-NOOPT-NEXT: shll $4, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_16: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_16: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_16: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $4, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_16: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shlq $4, %rdi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 16 ret i64 %mul } @@ -297,12 +859,49 @@ define i64 @test_mul_by_17(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_17: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $4, %rax -; X64-NEXT: leaq (%rax,%rdi), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_17: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_17: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_17: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $17, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_17: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_17: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_17: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00] +; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_17: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 17 ret i64 %mul } @@ -317,11 +916,46 @@ define i64 @test_mul_by_18(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,2), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_18: -; X64: # BB#0: -; X64-NEXT: addq %rdi, %rdi -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_18: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_18: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_18: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $18, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_18: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_18: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_18: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50] +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_18: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 18 ret i64 %mul } @@ -329,16 +963,56 @@ define i64 @test_mul_by_18(i64 %x) { define i64 @test_mul_by_19(i64 %x) { ; X86-LABEL: test_mul_by_19: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: shll $2, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $19, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_19: -; X64: # BB#0: -; X64-NEXT: imulq $19, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_19: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_19: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $2, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_19: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $19, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_19: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_19: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_19: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_19: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 19 ret i64 %mul } @@ -353,11 +1027,46 @@ define i64 @test_mul_by_20(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,4), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_20: -; X64: # BB#0: -; X64-NEXT: shlq $2, %rdi -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_20: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_20: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_20: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $20, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_20: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_20: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_20: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_20: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 20 ret i64 %mul } @@ -365,16 +1074,53 @@ define i64 @test_mul_by_20(i64 %x) { define i64 @test_mul_by_21(i64 %x) { ; X86-LABEL: test_mul_by_21: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl $21, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_21: -; X64: # BB#0: -; X64-NEXT: imulq $21, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_21: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_21: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_21: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $21, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_21: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_21: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_21: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_21: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 21 ret i64 %mul } @@ -382,16 +1128,56 @@ define i64 @test_mul_by_21(i64 %x) { define i64 @test_mul_by_22(i64 %x) { ; X86-LABEL: test_mul_by_22: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $22, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_22: -; X64: # BB#0: -; X64-NEXT: imulq $22, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_22: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_22: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_22: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $22, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_22: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_22: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_22: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_22: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 22 ret i64 %mul } @@ -399,16 +1185,56 @@ define i64 @test_mul_by_22(i64 %x) { define i64 @test_mul_by_23(i64 %x) { ; X86-LABEL: test_mul_by_23: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: shll $3, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $23, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_23: -; X64: # BB#0: -; X64-NEXT: imulq $23, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_23: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_23: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_23: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $23, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_23: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_23: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_23: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_23: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 23 ret i64 %mul } @@ -423,11 +1249,46 @@ define i64 @test_mul_by_24(i64 %x) { ; X86-NEXT: leal (%edx,%ecx,8), %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_24: -; X64: # BB#0: -; X64-NEXT: shlq $3, %rdi -; X64-NEXT: leaq (%rdi,%rdi,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_24: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_24: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_24: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $24, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_24: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_24: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_24: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_24: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 24 ret i64 %mul } @@ -443,11 +1304,46 @@ define i64 @test_mul_by_25(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_25: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,4), %rax -; X64-NEXT: leaq (%rax,%rax,4), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_25: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_25: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_25: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $25, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_25: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_25: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_25: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_25: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 25 ret i64 %mul } @@ -455,16 +1351,56 @@ define i64 @test_mul_by_25(i64 %x) { define i64 @test_mul_by_26(i64 %x) { ; X86-LABEL: test_mul_by_26: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $26, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_26: -; X64: # BB#0: -; X64-NEXT: imulq $26, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_26: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_26: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_26: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $26, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_26: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_26: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_26: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_26: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 26 ret i64 %mul } @@ -480,11 +1416,46 @@ define i64 @test_mul_by_27(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_27: -; X64: # BB#0: -; X64-NEXT: leaq (%rdi,%rdi,8), %rax -; X64-NEXT: leaq (%rax,%rax,2), %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_27: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_27: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_27: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $27, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_27: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_27: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_27: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00] +; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_27: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 27 ret i64 %mul } @@ -492,16 +1463,56 @@ define i64 @test_mul_by_27(i64 %x) { define i64 @test_mul_by_28(i64 %x) { ; X86-LABEL: test_mul_by_28: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $28, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_28: -; X64: # BB#0: -; X64-NEXT: imulq $28, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_28: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_28: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_28: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $28, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_28: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_28: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_28: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_28: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 28 ret i64 %mul } @@ -509,16 +1520,59 @@ define i64 @test_mul_by_28(i64 %x) { define i64 @test_mul_by_29(i64 %x) { ; X86-LABEL: test_mul_by_29: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %eax, %ecx ; X86-NEXT: movl $29, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_29: -; X64: # BB#0: -; X64-NEXT: imulq $29, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_29: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_29: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] +; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_29: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $29, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_29: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_29: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_29: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_29: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 29 ret i64 %mul } @@ -526,16 +1580,59 @@ define i64 @test_mul_by_29(i64 %x) { define i64 @test_mul_by_30(i64 %x) { ; X86-LABEL: test_mul_by_30: ; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: subl %eax, %ecx ; X86-NEXT: movl $30, %eax ; X86-NEXT: mull {{[0-9]+}}(%esp) -; X86-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_30: -; X64: # BB#0: -; X64-NEXT: imulq $30, %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_30: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_30: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_30: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $30, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_30: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_30: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_30: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_30: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 30 ret i64 %mul } @@ -552,12 +1649,49 @@ define i64 @test_mul_by_31(i64 %x) { ; X86-NEXT: addl %ecx, %edx ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_31: -; X64: # BB#0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: shlq $5, %rax -; X64-NEXT: subq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_31: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_31: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50] +; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_31: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl $31, %eax +; X86-NOOPT-NEXT: mull {{[0-9]+}}(%esp) +; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_31: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_31: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_31: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00] +; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_31: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 31 ret i64 %mul } @@ -571,11 +1705,168 @@ define i64 @test_mul_by_32(i64 %x) { ; X86-NEXT: shll $5, %eax ; X86-NEXT: retl ; -; X64-LABEL: test_mul_by_32: -; X64: # BB#0: -; X64-NEXT: shlq $5, %rdi -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: retq +; X64-HSW-LABEL: test_mul_by_32: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] +; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_by_32: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: shlq $5, %rdi # sched: [1:0.50] +; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.17] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_by_32: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOOPT-NEXT: shldl $5, %eax, %edx +; X86-NOOPT-NEXT: shll $5, %eax +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_by_32: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] +; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_by_32: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] +; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.17] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_by_32: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: shlq $5, %rdi # sched: [1:1.00] +; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_by_32: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: shlq $5, %rdi # sched: [1:1.00] +; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] %mul = mul nsw i64 %x, 32 ret i64 %mul } + +; (x*9+42)*(x*5+2) +define i64 @test_mul_spec(i64 %x) nounwind { +; X86-LABEL: test_mul_spec: +; X86: # BB#0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl $9, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: leal (%edi,%edi,8), %ebx +; X86-NEXT: addl $42, %esi +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: movl $5, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: leal (%edi,%edi,4), %edi +; X86-NEXT: addl $2, %ecx +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ecx +; X86-NEXT: imull %esi, %edi +; X86-NEXT: addl %edi, %edx +; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: addl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-HSW-LABEL: test_mul_spec: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] +; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25] +; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] +; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] +; +; X64-JAG-LABEL: test_mul_spec: +; X64-JAG: # BB#0: +; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50] +; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50] +; X64-JAG-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-JAG-NEXT: retq # sched: [4:1.00] +; +; X86-NOOPT-LABEL: test_mul_spec: +; X86-NOOPT: # BB#0: +; X86-NOOPT-NEXT: pushl %ebx +; X86-NOOPT-NEXT: pushl %edi +; X86-NOOPT-NEXT: pushl %esi +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOOPT-NEXT: movl $9, %edx +; X86-NOOPT-NEXT: movl %ecx, %eax +; X86-NOOPT-NEXT: mull %edx +; X86-NOOPT-NEXT: movl %eax, %esi +; X86-NOOPT-NEXT: leal (%edi,%edi,8), %ebx +; X86-NOOPT-NEXT: addl $42, %esi +; X86-NOOPT-NEXT: adcl %edx, %ebx +; X86-NOOPT-NEXT: movl $5, %edx +; X86-NOOPT-NEXT: movl %ecx, %eax +; X86-NOOPT-NEXT: mull %edx +; X86-NOOPT-NEXT: movl %eax, %ecx +; X86-NOOPT-NEXT: leal (%edi,%edi,4), %edi +; X86-NOOPT-NEXT: addl $2, %ecx +; X86-NOOPT-NEXT: adcl %edx, %edi +; X86-NOOPT-NEXT: movl %esi, %eax +; X86-NOOPT-NEXT: mull %ecx +; X86-NOOPT-NEXT: imull %esi, %edi +; X86-NOOPT-NEXT: addl %edi, %edx +; X86-NOOPT-NEXT: imull %ebx, %ecx +; X86-NOOPT-NEXT: addl %ecx, %edx +; X86-NOOPT-NEXT: popl %esi +; X86-NOOPT-NEXT: popl %edi +; X86-NOOPT-NEXT: popl %ebx +; X86-NOOPT-NEXT: retl +; +; HSW-NOOPT-LABEL: test_mul_spec: +; HSW-NOOPT: # BB#0: +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50] +; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25] +; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] +; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] +; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; +; JAG-NOOPT-LABEL: test_mul_spec: +; JAG-NOOPT: # BB#0: +; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:0.50] +; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:0.50] +; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; JAG-NOOPT-NEXT: retq # sched: [4:1.00] +; +; X64-SLM-LABEL: test_mul_spec: +; X64-SLM: # BB#0: +; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] +; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] +; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; X64-SLM-NEXT: retq # sched: [4:1.00] +; +; SLM-NOOPT-LABEL: test_mul_spec: +; SLM-NOOPT: # BB#0: +; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00] +; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00] +; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] +; SLM-NOOPT-NEXT: retq # sched: [4:1.00] + %mul = mul nsw i64 %x, 9 + %add = add nsw i64 %mul, 42 + %mul2 = mul nsw i64 %x, 5 + %add2 = add nsw i64 %mul2, 2 + %mul3 = mul nsw i64 %add, %add2 + ret i64 %mul3 +} diff --git a/test/CodeGen/X86/mul-constant-result.ll b/test/CodeGen/X86/mul-constant-result.ll new file mode 100644 index 000000000000..65d80a699e24 --- /dev/null +++ b/test/CodeGen/X86/mul-constant-result.ll @@ -0,0 +1,1291 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @mult(i32, i32) local_unnamed_addr #0 { +; X86-LABEL: mult: +; X86: # BB#0: +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi0: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .Lcfi1: +; X86-NEXT: .cfi_offset %esi, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: cmpl $1, %edx +; X86-NEXT: movl $1, %eax +; X86-NEXT: movl $1, %esi +; X86-NEXT: jg .LBB0_2 +; X86-NEXT: # BB#1: +; X86-NEXT: movl %edx, %esi +; X86-NEXT: .LBB0_2: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: testl %edx, %edx +; X86-NEXT: je .LBB0_4 +; X86-NEXT: # BB#3: +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB0_4: +; X86-NEXT: decl %ecx +; X86-NEXT: cmpl $31, %ecx +; X86-NEXT: ja .LBB0_39 +; X86-NEXT: # BB#5: +; X86-NEXT: jmpl *.LJTI0_0(,%ecx,4) +; X86-NEXT: .LBB0_6: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_39: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB0_40: +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_7: +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_8: +; X86-NEXT: shll $2, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_9: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_10: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_11: +; X86-NEXT: leal (,%eax,8), %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_13: +; X86-NEXT: shll $3, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_14: +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_15: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_16: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_17: +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_18: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_19: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: jmp .LBB0_20 +; X86-NEXT: .LBB0_21: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_22: +; X86-NEXT: shll $4, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_23: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $4, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_24: +; X86-NEXT: addl %eax, %eax +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_25: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: shll $2, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_26: +; X86-NEXT: shll $2, %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_27: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_28: +; X86-NEXT: leal (%eax,%eax,4), %ecx +; X86-NEXT: .LBB0_20: +; X86-NEXT: leal (%eax,%ecx,4), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_29: +; X86-NEXT: leal (%eax,%eax,2), %ecx +; X86-NEXT: shll $3, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_30: +; X86-NEXT: shll $3, %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_31: +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: leal (%eax,%eax,4), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_32: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_33: +; X86-NEXT: leal (%eax,%eax,8), %eax +; X86-NEXT: leal (%eax,%eax,2), %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_34: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_35: +; X86-NEXT: leal (%eax,%eax,8), %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: addl %eax, %ecx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_36: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: jmp .LBB0_12 +; X86-NEXT: .LBB0_37: +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shll $5, %ecx +; X86-NEXT: .LBB0_12: +; X86-NEXT: subl %eax, %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; X86-NEXT: .LBB0_38: +; X86-NEXT: shll $5, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-HSW-LABEL: mult: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def> +; X64-HSW-NEXT: cmpl $1, %esi +; X64-HSW-NEXT: movl $1, %ecx +; X64-HSW-NEXT: movl %esi, %eax +; X64-HSW-NEXT: cmovgl %ecx, %eax +; X64-HSW-NEXT: testl %esi, %esi +; X64-HSW-NEXT: cmovel %ecx, %eax +; X64-HSW-NEXT: addl $-1, %edi +; X64-HSW-NEXT: cmpl $31, %edi +; X64-HSW-NEXT: ja .LBB0_36 +; X64-HSW-NEXT: # BB#1: +; X64-HSW-NEXT: jmpq *.LJTI0_0(,%rdi,8) +; X64-HSW-NEXT: .LBB0_2: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_36: +; X64-HSW-NEXT: xorl %eax, %eax +; X64-HSW-NEXT: .LBB0_37: +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_3: +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_4: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_5: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_6: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_7: +; X64-HSW-NEXT: leal (,%rax,8), %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_9: +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_10: +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_11: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_12: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_13: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_14: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_15: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_16 +; X64-HSW-NEXT: .LBB0_18: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_19: +; X64-HSW-NEXT: shll $4, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_20: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $4, %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_21: +; X64-HSW-NEXT: addl %eax, %eax +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_22: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: shll $2, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_23: +; X64-HSW-NEXT: shll $2, %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_24: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: leal (%rax,%rcx,4), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_25: +; X64-HSW-NEXT: leal (%rax,%rax,4), %ecx +; X64-HSW-NEXT: .LBB0_16: +; X64-HSW-NEXT: leal (%rax,%rcx,4), %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_26: +; X64-HSW-NEXT: leal (%rax,%rax,2), %ecx +; X64-HSW-NEXT: shll $3, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_27: +; X64-HSW-NEXT: shll $3, %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_28: +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: leal (%rax,%rax,4), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_29: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_30: +; X64-HSW-NEXT: leal (%rax,%rax,8), %eax +; X64-HSW-NEXT: leal (%rax,%rax,2), %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_31: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: jmp .LBB0_17 +; X64-HSW-NEXT: .LBB0_32: +; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx +; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: .LBB0_17: +; X64-HSW-NEXT: addl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_33: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $5, %ecx +; X64-HSW-NEXT: subl %eax, %ecx +; X64-HSW-NEXT: jmp .LBB0_8 +; X64-HSW-NEXT: .LBB0_34: +; X64-HSW-NEXT: movl %eax, %ecx +; X64-HSW-NEXT: shll $5, %ecx +; X64-HSW-NEXT: .LBB0_8: +; X64-HSW-NEXT: subl %eax, %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq +; X64-HSW-NEXT: .LBB0_35: +; X64-HSW-NEXT: shll $5, %eax +; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> +; X64-HSW-NEXT: retq + %3 = icmp eq i32 %1, 0 + %4 = icmp sgt i32 %1, 1 + %5 = or i1 %3, %4 + %6 = select i1 %5, i32 1, i32 %1 + switch i32 %0, label %69 [ + i32 1, label %70 + i32 2, label %7 + i32 3, label %9 + i32 4, label %11 + i32 5, label %13 + i32 6, label %15 + i32 7, label %17 + i32 8, label %19 + i32 9, label %21 + i32 10, label %23 + i32 11, label %25 + i32 12, label %27 + i32 13, label %29 + i32 14, label %31 + i32 15, label %33 + i32 16, label %35 + i32 17, label %37 + i32 18, label %39 + i32 19, label %41 + i32 20, label %43 + i32 21, label %45 + i32 22, label %47 + i32 23, label %49 + i32 24, label %51 + i32 25, label %53 + i32 26, label %55 + i32 27, label %57 + i32 28, label %59 + i32 29, label %61 + i32 30, label %63 + i32 31, label %65 + i32 32, label %67 + ] + +; <label>:7: ; preds = %2 + %8 = shl nsw i32 %6, 1 + br label %70 + +; <label>:9: ; preds = %2 + %10 = mul nsw i32 %6, 3 + br label %70 + +; <label>:11: ; preds = %2 + %12 = shl nsw i32 %6, 2 + br label %70 + +; <label>:13: ; preds = %2 + %14 = mul nsw i32 %6, 5 + br label %70 + +; <label>:15: ; preds = %2 + %16 = mul nsw i32 %6, 6 + br label %70 + +; <label>:17: ; preds = %2 + %18 = mul nsw i32 %6, 7 + br label %70 + +; <label>:19: ; preds = %2 + %20 = shl nsw i32 %6, 3 + br label %70 + +; <label>:21: ; preds = %2 + %22 = mul nsw i32 %6, 9 + br label %70 + +; <label>:23: ; preds = %2 + %24 = mul nsw i32 %6, 10 + br label %70 + +; <label>:25: ; preds = %2 + %26 = mul nsw i32 %6, 11 + br label %70 + +; <label>:27: ; preds = %2 + %28 = mul nsw i32 %6, 12 + br label %70 + +; <label>:29: ; preds = %2 + %30 = mul nsw i32 %6, 13 + br label %70 + +; <label>:31: ; preds = %2 + %32 = mul nsw i32 %6, 14 + br label %70 + +; <label>:33: ; preds = %2 + %34 = mul nsw i32 %6, 15 + br label %70 + +; <label>:35: ; preds = %2 + %36 = shl nsw i32 %6, 4 + br label %70 + +; <label>:37: ; preds = %2 + %38 = mul nsw i32 %6, 17 + br label %70 + +; <label>:39: ; preds = %2 + %40 = mul nsw i32 %6, 18 + br label %70 + +; <label>:41: ; preds = %2 + %42 = mul nsw i32 %6, 19 + br label %70 + +; <label>:43: ; preds = %2 + %44 = mul nsw i32 %6, 20 + br label %70 + +; <label>:45: ; preds = %2 + %46 = mul nsw i32 %6, 21 + br label %70 + +; <label>:47: ; preds = %2 + %48 = mul nsw i32 %6, 22 + br label %70 + +; <label>:49: ; preds = %2 + %50 = mul nsw i32 %6, 23 + br label %70 + +; <label>:51: ; preds = %2 + %52 = mul nsw i32 %6, 24 + br label %70 + +; <label>:53: ; preds = %2 + %54 = mul nsw i32 %6, 25 + br label %70 + +; <label>:55: ; preds = %2 + %56 = mul nsw i32 %6, 26 + br label %70 + +; <label>:57: ; preds = %2 + %58 = mul nsw i32 %6, 27 + br label %70 + +; <label>:59: ; preds = %2 + %60 = mul nsw i32 %6, 28 + br label %70 + +; <label>:61: ; preds = %2 + %62 = mul nsw i32 %6, 29 + br label %70 + +; <label>:63: ; preds = %2 + %64 = mul nsw i32 %6, 30 + br label %70 + +; <label>:65: ; preds = %2 + %66 = mul nsw i32 %6, 31 + br label %70 + +; <label>:67: ; preds = %2 + %68 = shl nsw i32 %6, 5 + br label %70 + +; <label>:69: ; preds = %2 + br label %70 + +; <label>:70: ; preds = %2, %69, %67, %65, %63, %61, %59, %57, %55, %53, %51, %49, %47, %45, %43, %41, %39, %37, %35, %33, %31, %29, %27, %25, %23, %21, %19, %17, %15, %13, %11, %9, %7 + %71 = phi i32 [ %8, %7 ], [ %10, %9 ], [ %12, %11 ], [ %14, %13 ], [ %16, %15 ], [ %18, %17 ], [ %20, %19 ], [ %22, %21 ], [ %24, %23 ], [ %26, %25 ], [ %28, %27 ], [ %30, %29 ], [ %32, %31 ], [ %34, %33 ], [ %36, %35 ], [ %38, %37 ], [ %40, %39 ], [ %42, %41 ], [ %44, %43 ], [ %46, %45 ], [ %48, %47 ], [ %50, %49 ], [ %52, %51 ], [ %54, %53 ], [ %56, %55 ], [ %58, %57 ], [ %60, %59 ], [ %62, %61 ], [ %64, %63 ], [ %66, %65 ], [ %68, %67 ], [ 0, %69 ], [ %6, %2 ] + ret i32 %71 +} + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @foo() local_unnamed_addr #0 { +; X86-LABEL: foo: +; X86: # BB#0: +; X86-NEXT: pushl %ebx +; X86-NEXT: .Lcfi2: +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %edi +; X86-NEXT: .Lcfi3: +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %esi +; X86-NEXT: .Lcfi4: +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: .Lcfi5: +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .Lcfi6: +; X86-NEXT: .cfi_offset %edi, -12 +; X86-NEXT: .Lcfi7: +; X86-NEXT: .cfi_offset %ebx, -8 +; X86-NEXT: pushl $0 +; X86-NEXT: .Lcfi8: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $1 +; X86-NEXT: .Lcfi9: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi10: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %esi +; X86-NEXT: xorl $1, %esi +; X86-NEXT: pushl $1 +; X86-NEXT: .Lcfi11: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $2 +; X86-NEXT: .Lcfi12: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi13: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $2, %edi +; X86-NEXT: pushl $1 +; X86-NEXT: .Lcfi14: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $3 +; X86-NEXT: .Lcfi15: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi16: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $3, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $2 +; X86-NEXT: .Lcfi17: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $4 +; X86-NEXT: .Lcfi18: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi19: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $4, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $2 +; X86-NEXT: .Lcfi20: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $5 +; X86-NEXT: .Lcfi21: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi22: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $5, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $3 +; X86-NEXT: .Lcfi23: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $6 +; X86-NEXT: .Lcfi24: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi25: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $6, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $3 +; X86-NEXT: .Lcfi26: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $7 +; X86-NEXT: .Lcfi27: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi28: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $7, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $4 +; X86-NEXT: .Lcfi29: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $8 +; X86-NEXT: .Lcfi30: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi31: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $8, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $4 +; X86-NEXT: .Lcfi32: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $9 +; X86-NEXT: .Lcfi33: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi34: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $9, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $5 +; X86-NEXT: .Lcfi35: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $10 +; X86-NEXT: .Lcfi36: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi37: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $10, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $5 +; X86-NEXT: .Lcfi38: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $11 +; X86-NEXT: .Lcfi39: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi40: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $11, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $6 +; X86-NEXT: .Lcfi41: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $12 +; X86-NEXT: .Lcfi42: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi43: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $12, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $6 +; X86-NEXT: .Lcfi44: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $13 +; X86-NEXT: .Lcfi45: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi46: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $13, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $7 +; X86-NEXT: .Lcfi47: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $14 +; X86-NEXT: .Lcfi48: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi49: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $14, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $7 +; X86-NEXT: .Lcfi50: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $15 +; X86-NEXT: .Lcfi51: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi52: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $15, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $8 +; X86-NEXT: .Lcfi53: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $16 +; X86-NEXT: .Lcfi54: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi55: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $16, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $8 +; X86-NEXT: .Lcfi56: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $17 +; X86-NEXT: .Lcfi57: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi58: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $17, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $9 +; X86-NEXT: .Lcfi59: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $18 +; X86-NEXT: .Lcfi60: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi61: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $18, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $9 +; X86-NEXT: .Lcfi62: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $19 +; X86-NEXT: .Lcfi63: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi64: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $19, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $10 +; X86-NEXT: .Lcfi65: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $20 +; X86-NEXT: .Lcfi66: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi67: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $20, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $10 +; X86-NEXT: .Lcfi68: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $21 +; X86-NEXT: .Lcfi69: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi70: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $21, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $11 +; X86-NEXT: .Lcfi71: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $22 +; X86-NEXT: .Lcfi72: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi73: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $22, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $11 +; X86-NEXT: .Lcfi74: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $23 +; X86-NEXT: .Lcfi75: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi76: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $23, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $12 +; X86-NEXT: .Lcfi77: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $24 +; X86-NEXT: .Lcfi78: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi79: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $24, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $12 +; X86-NEXT: .Lcfi80: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $25 +; X86-NEXT: .Lcfi81: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi82: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $25, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $13 +; X86-NEXT: .Lcfi83: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $26 +; X86-NEXT: .Lcfi84: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi85: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $26, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $13 +; X86-NEXT: .Lcfi86: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $27 +; X86-NEXT: .Lcfi87: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi88: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $27, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $14 +; X86-NEXT: .Lcfi89: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $28 +; X86-NEXT: .Lcfi90: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi91: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $28, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $14 +; X86-NEXT: .Lcfi92: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $29 +; X86-NEXT: .Lcfi93: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi94: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $29, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: pushl $15 +; X86-NEXT: .Lcfi95: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $30 +; X86-NEXT: .Lcfi96: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi97: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl $30, %edi +; X86-NEXT: orl %ebx, %edi +; X86-NEXT: pushl $15 +; X86-NEXT: .Lcfi98: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $31 +; X86-NEXT: .Lcfi99: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi100: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl $31, %ebx +; X86-NEXT: orl %edi, %ebx +; X86-NEXT: orl %esi, %ebx +; X86-NEXT: pushl $16 +; X86-NEXT: .Lcfi101: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: pushl $32 +; X86-NEXT: .Lcfi102: +; X86-NEXT: .cfi_adjust_cfa_offset 4 +; X86-NEXT: calll mult +; X86-NEXT: addl $8, %esp +; X86-NEXT: .Lcfi103: +; X86-NEXT: .cfi_adjust_cfa_offset -8 +; X86-NEXT: xorl $32, %eax +; X86-NEXT: orl %ebx, %eax +; X86-NEXT: movl $-1, %eax +; X86-NEXT: jne .LBB1_2 +; X86-NEXT: # BB#1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB1_2: +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-HSW-LABEL: foo: +; X64-HSW: # BB#0: +; X64-HSW-NEXT: pushq %rbp +; X64-HSW-NEXT: .Lcfi0: +; X64-HSW-NEXT: .cfi_def_cfa_offset 16 +; X64-HSW-NEXT: pushq %r15 +; X64-HSW-NEXT: .Lcfi1: +; X64-HSW-NEXT: .cfi_def_cfa_offset 24 +; X64-HSW-NEXT: pushq %r14 +; X64-HSW-NEXT: .Lcfi2: +; X64-HSW-NEXT: .cfi_def_cfa_offset 32 +; X64-HSW-NEXT: pushq %r12 +; X64-HSW-NEXT: .Lcfi3: +; X64-HSW-NEXT: .cfi_def_cfa_offset 40 +; X64-HSW-NEXT: pushq %rbx +; X64-HSW-NEXT: .Lcfi4: +; X64-HSW-NEXT: .cfi_def_cfa_offset 48 +; X64-HSW-NEXT: .Lcfi5: +; X64-HSW-NEXT: .cfi_offset %rbx, -48 +; X64-HSW-NEXT: .Lcfi6: +; X64-HSW-NEXT: .cfi_offset %r12, -40 +; X64-HSW-NEXT: .Lcfi7: +; X64-HSW-NEXT: .cfi_offset %r14, -32 +; X64-HSW-NEXT: .Lcfi8: +; X64-HSW-NEXT: .cfi_offset %r15, -24 +; X64-HSW-NEXT: .Lcfi9: +; X64-HSW-NEXT: .cfi_offset %rbp, -16 +; X64-HSW-NEXT: xorl %r12d, %r12d +; X64-HSW-NEXT: movl $1, %edi +; X64-HSW-NEXT: xorl %esi, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $1, %ebx +; X64-HSW-NEXT: movl $2, %edi +; X64-HSW-NEXT: movl $1, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $2, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $3, %edi +; X64-HSW-NEXT: movl $1, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r14d +; X64-HSW-NEXT: xorl $3, %r14d +; X64-HSW-NEXT: movl $4, %edi +; X64-HSW-NEXT: movl $2, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $4, %ebx +; X64-HSW-NEXT: orl %r14d, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $5, %edi +; X64-HSW-NEXT: movl $2, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r14d +; X64-HSW-NEXT: xorl $5, %r14d +; X64-HSW-NEXT: movl $6, %edi +; X64-HSW-NEXT: movl $3, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $6, %ebp +; X64-HSW-NEXT: orl %r14d, %ebp +; X64-HSW-NEXT: movl $7, %edi +; X64-HSW-NEXT: movl $3, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r14d +; X64-HSW-NEXT: xorl $7, %r14d +; X64-HSW-NEXT: orl %ebp, %r14d +; X64-HSW-NEXT: orl %ebx, %r14d +; X64-HSW-NEXT: movl $8, %edi +; X64-HSW-NEXT: movl $4, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $8, %ebx +; X64-HSW-NEXT: movl $9, %edi +; X64-HSW-NEXT: movl $4, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $9, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $10, %edi +; X64-HSW-NEXT: movl $5, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $10, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $11, %edi +; X64-HSW-NEXT: movl $5, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r15d +; X64-HSW-NEXT: xorl $11, %r15d +; X64-HSW-NEXT: orl %ebx, %r15d +; X64-HSW-NEXT: orl %r14d, %r15d +; X64-HSW-NEXT: movl $12, %edi +; X64-HSW-NEXT: movl $6, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $12, %ebx +; X64-HSW-NEXT: movl $13, %edi +; X64-HSW-NEXT: movl $6, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $13, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $14, %edi +; X64-HSW-NEXT: movl $7, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $14, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $15, %edi +; X64-HSW-NEXT: movl $7, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $15, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $16, %edi +; X64-HSW-NEXT: movl $8, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r14d +; X64-HSW-NEXT: xorl $16, %r14d +; X64-HSW-NEXT: orl %ebp, %r14d +; X64-HSW-NEXT: orl %r15d, %r14d +; X64-HSW-NEXT: movl $17, %edi +; X64-HSW-NEXT: movl $8, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $17, %ebp +; X64-HSW-NEXT: movl $18, %edi +; X64-HSW-NEXT: movl $9, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $18, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $19, %edi +; X64-HSW-NEXT: movl $9, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $19, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $20, %edi +; X64-HSW-NEXT: movl $10, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $20, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $21, %edi +; X64-HSW-NEXT: movl $10, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $21, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $22, %edi +; X64-HSW-NEXT: movl $11, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r15d +; X64-HSW-NEXT: xorl $22, %r15d +; X64-HSW-NEXT: orl %ebp, %r15d +; X64-HSW-NEXT: orl %r14d, %r15d +; X64-HSW-NEXT: movl $23, %edi +; X64-HSW-NEXT: movl $11, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $23, %ebp +; X64-HSW-NEXT: movl $24, %edi +; X64-HSW-NEXT: movl $12, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $24, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $25, %edi +; X64-HSW-NEXT: movl $12, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $25, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $26, %edi +; X64-HSW-NEXT: movl $13, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $26, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $27, %edi +; X64-HSW-NEXT: movl $13, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $27, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: movl $28, %edi +; X64-HSW-NEXT: movl $14, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $28, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $29, %edi +; X64-HSW-NEXT: movl $14, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebp +; X64-HSW-NEXT: xorl $29, %ebp +; X64-HSW-NEXT: orl %ebx, %ebp +; X64-HSW-NEXT: orl %r15d, %ebp +; X64-HSW-NEXT: movl $30, %edi +; X64-HSW-NEXT: movl $15, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %r14d +; X64-HSW-NEXT: xorl $30, %r14d +; X64-HSW-NEXT: movl $31, %edi +; X64-HSW-NEXT: movl $15, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: movl %eax, %ebx +; X64-HSW-NEXT: xorl $31, %ebx +; X64-HSW-NEXT: orl %r14d, %ebx +; X64-HSW-NEXT: orl %ebp, %ebx +; X64-HSW-NEXT: movl $32, %edi +; X64-HSW-NEXT: movl $16, %esi +; X64-HSW-NEXT: callq mult +; X64-HSW-NEXT: xorl $32, %eax +; X64-HSW-NEXT: orl %ebx, %eax +; X64-HSW-NEXT: movl $-1, %eax +; X64-HSW-NEXT: cmovel %r12d, %eax +; X64-HSW-NEXT: popq %rbx +; X64-HSW-NEXT: popq %r12 +; X64-HSW-NEXT: popq %r14 +; X64-HSW-NEXT: popq %r15 +; X64-HSW-NEXT: popq %rbp +; X64-HSW-NEXT: retq + %1 = tail call i32 @mult(i32 1, i32 0) + %2 = icmp ne i32 %1, 1 + %3 = tail call i32 @mult(i32 2, i32 1) + %4 = icmp ne i32 %3, 2 + %5 = or i1 %2, %4 + %6 = tail call i32 @mult(i32 3, i32 1) + %7 = icmp ne i32 %6, 3 + %8 = or i1 %5, %7 + %9 = tail call i32 @mult(i32 4, i32 2) + %10 = icmp ne i32 %9, 4 + %11 = or i1 %8, %10 + %12 = tail call i32 @mult(i32 5, i32 2) + %13 = icmp ne i32 %12, 5 + %14 = or i1 %11, %13 + %15 = tail call i32 @mult(i32 6, i32 3) + %16 = icmp ne i32 %15, 6 + %17 = or i1 %14, %16 + %18 = tail call i32 @mult(i32 7, i32 3) + %19 = icmp ne i32 %18, 7 + %20 = or i1 %17, %19 + %21 = tail call i32 @mult(i32 8, i32 4) + %22 = icmp ne i32 %21, 8 + %23 = or i1 %20, %22 + %24 = tail call i32 @mult(i32 9, i32 4) + %25 = icmp ne i32 %24, 9 + %26 = or i1 %23, %25 + %27 = tail call i32 @mult(i32 10, i32 5) + %28 = icmp ne i32 %27, 10 + %29 = or i1 %26, %28 + %30 = tail call i32 @mult(i32 11, i32 5) + %31 = icmp ne i32 %30, 11 + %32 = or i1 %29, %31 + %33 = tail call i32 @mult(i32 12, i32 6) + %34 = icmp ne i32 %33, 12 + %35 = or i1 %32, %34 + %36 = tail call i32 @mult(i32 13, i32 6) + %37 = icmp ne i32 %36, 13 + %38 = or i1 %35, %37 + %39 = tail call i32 @mult(i32 14, i32 7) + %40 = icmp ne i32 %39, 14 + %41 = or i1 %38, %40 + %42 = tail call i32 @mult(i32 15, i32 7) + %43 = icmp ne i32 %42, 15 + %44 = or i1 %41, %43 + %45 = tail call i32 @mult(i32 16, i32 8) + %46 = icmp ne i32 %45, 16 + %47 = or i1 %44, %46 + %48 = tail call i32 @mult(i32 17, i32 8) + %49 = icmp ne i32 %48, 17 + %50 = or i1 %47, %49 + %51 = tail call i32 @mult(i32 18, i32 9) + %52 = icmp ne i32 %51, 18 + %53 = or i1 %50, %52 + %54 = tail call i32 @mult(i32 19, i32 9) + %55 = icmp ne i32 %54, 19 + %56 = or i1 %53, %55 + %57 = tail call i32 @mult(i32 20, i32 10) + %58 = icmp ne i32 %57, 20 + %59 = or i1 %56, %58 + %60 = tail call i32 @mult(i32 21, i32 10) + %61 = icmp ne i32 %60, 21 + %62 = or i1 %59, %61 + %63 = tail call i32 @mult(i32 22, i32 11) + %64 = icmp ne i32 %63, 22 + %65 = or i1 %62, %64 + %66 = tail call i32 @mult(i32 23, i32 11) + %67 = icmp ne i32 %66, 23 + %68 = or i1 %65, %67 + %69 = tail call i32 @mult(i32 24, i32 12) + %70 = icmp ne i32 %69, 24 + %71 = or i1 %68, %70 + %72 = tail call i32 @mult(i32 25, i32 12) + %73 = icmp ne i32 %72, 25 + %74 = or i1 %71, %73 + %75 = tail call i32 @mult(i32 26, i32 13) + %76 = icmp ne i32 %75, 26 + %77 = or i1 %74, %76 + %78 = tail call i32 @mult(i32 27, i32 13) + %79 = icmp ne i32 %78, 27 + %80 = or i1 %77, %79 + %81 = tail call i32 @mult(i32 28, i32 14) + %82 = icmp ne i32 %81, 28 + %83 = or i1 %80, %82 + %84 = tail call i32 @mult(i32 29, i32 14) + %85 = icmp ne i32 %84, 29 + %86 = or i1 %83, %85 + %87 = tail call i32 @mult(i32 30, i32 15) + %88 = icmp ne i32 %87, 30 + %89 = or i1 %86, %88 + %90 = tail call i32 @mult(i32 31, i32 15) + %91 = icmp ne i32 %90, 31 + %92 = or i1 %89, %91 + %93 = tail call i32 @mult(i32 32, i32 16) + %94 = icmp ne i32 %93, 32 + %95 = or i1 %92, %94 + %96 = sext i1 %95 to i32 + ret i32 %96 +} + +attributes #0 = { norecurse nounwind readnone uwtable } diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll index eaab26ef9547..3c916fd38c6c 100644 --- a/test/CodeGen/X86/nontemporal-loads.ll +++ b/test/CodeGen/X86/nontemporal-loads.ll @@ -168,7 +168,9 @@ define <8 x float> @test_v8f32(<8 x float>* %src) { ; ; AVX1-LABEL: test_v8f32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8f32: @@ -199,7 +201,9 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) { ; ; AVX1-LABEL: test_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8i32: @@ -240,7 +244,9 @@ define <4 x double> @test_v4f64(<4 x double>* %src) { ; ; AVX1-LABEL: test_v4f64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v4f64: @@ -271,7 +277,9 @@ define <4 x i64> @test_v4i64(<4 x i64>* %src) { ; ; AVX1-LABEL: test_v4i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v4i64: @@ -302,7 +310,9 @@ define <16 x i16> @test_v16i16(<16 x i16>* %src) { ; ; AVX1-LABEL: test_v16i16: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v16i16: @@ -333,7 +343,9 @@ define <32 x i8> @test_v32i8(<32 x i8>* %src) { ; ; AVX1-LABEL: test_v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v32i8: @@ -370,8 +382,12 @@ define <16 x float> @test_v16f32(<16 x float>* %src) { ; ; AVX1-LABEL: test_v16f32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v16f32: @@ -407,8 +423,12 @@ define <16 x i32> @test_v16i32(<16 x i32>* %src) { ; ; AVX1-LABEL: test_v16i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v16i32: @@ -444,8 +464,12 @@ define <8 x double> @test_v8f64(<8 x double>* %src) { ; ; AVX1-LABEL: test_v8f64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8f64: @@ -481,8 +505,12 @@ define <8 x i64> @test_v8i64(<8 x i64>* %src) { ; ; AVX1-LABEL: test_v8i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v8i64: @@ -518,8 +546,12 @@ define <32 x i16> @test_v32i16(<32 x i16>* %src) { ; ; AVX1-LABEL: test_v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v32i16: @@ -567,8 +599,12 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) { ; ; AVX1-LABEL: test_v64i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_v64i8: @@ -601,19 +637,27 @@ define <64 x i8> @test_v64i8(<64 x i8>* %src) { ; Check cases where the load would be folded. define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) { -; SSE-LABEL: test_arg_v4f32: -; SSE: # BB#0: -; SSE-NEXT: addps (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v4f32: +; SSE2: # BB#0: +; SSE2-NEXT: addps (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v4f32: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa (%rdi), %xmm1 +; SSE41-NEXT: addps %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_arg_v4f32: ; AVX: # BB#0: -; AVX-NEXT: vaddps (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_arg_v4f32: ; AVX512: # BB#0: -; AVX512-NEXT: vaddps (%rdi), %xmm0, %xmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x float>, <4 x float>* %src, align 16, !nontemporal !1 %2 = fadd <4 x float> %arg, %1 @@ -621,19 +665,27 @@ define <4 x float> @test_arg_v4f32(<4 x float> %arg, <4 x float>* %src) { } define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) { -; SSE-LABEL: test_arg_v4i32: -; SSE: # BB#0: -; SSE-NEXT: paddd (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v4i32: +; SSE2: # BB#0: +; SSE2-NEXT: paddd (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v4i32: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa (%rdi), %xmm1 +; SSE41-NEXT: paddd %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_arg_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_arg_v4i32: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddd (%rdi), %xmm0, %xmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1 %2 = add <4 x i32> %arg, %1 @@ -641,19 +693,27 @@ define <4 x i32> @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %src) { } define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) { -; SSE-LABEL: test_arg_v2f64: -; SSE: # BB#0: -; SSE-NEXT: addpd (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v2f64: +; SSE2: # BB#0: +; SSE2-NEXT: addpd (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v2f64: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa (%rdi), %xmm1 +; SSE41-NEXT: addpd %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_arg_v2f64: ; AVX: # BB#0: -; AVX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_arg_v2f64: ; AVX512: # BB#0: -; AVX512-NEXT: vaddpd (%rdi), %xmm0, %xmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x double>, <2 x double>* %src, align 16, !nontemporal !1 %2 = fadd <2 x double> %arg, %1 @@ -661,19 +721,27 @@ define <2 x double> @test_arg_v2f64(<2 x double> %arg, <2 x double>* %src) { } define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) { -; SSE-LABEL: test_arg_v2i64: -; SSE: # BB#0: -; SSE-NEXT: paddq (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v2i64: +; SSE2: # BB#0: +; SSE2-NEXT: paddq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v2i64: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa (%rdi), %xmm1 +; SSE41-NEXT: paddq %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_arg_v2i64: ; AVX: # BB#0: -; AVX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_arg_v2i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddq (%rdi), %xmm0, %xmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %src, align 16, !nontemporal !1 %2 = add <2 x i64> %arg, %1 @@ -681,19 +749,27 @@ define <2 x i64> @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %src) { } define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) { -; SSE-LABEL: test_arg_v8i16: -; SSE: # BB#0: -; SSE-NEXT: paddw (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v8i16: +; SSE2: # BB#0: +; SSE2-NEXT: paddw (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v8i16: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa (%rdi), %xmm1 +; SSE41-NEXT: paddw %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_arg_v8i16: ; AVX: # BB#0: -; AVX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_arg_v8i16: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddw (%rdi), %xmm0, %xmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %src, align 16, !nontemporal !1 %2 = add <8 x i16> %arg, %1 @@ -701,19 +777,27 @@ define <8 x i16> @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %src) { } define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) { -; SSE-LABEL: test_arg_v16i8: -; SSE: # BB#0: -; SSE-NEXT: paddb (%rdi), %xmm0 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v16i8: +; SSE2: # BB#0: +; SSE2-NEXT: paddb (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v16i8: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa (%rdi), %xmm1 +; SSE41-NEXT: paddb %xmm1, %xmm0 +; SSE41-NEXT: retq ; ; AVX-LABEL: test_arg_v16i8: ; AVX: # BB#0: -; AVX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 +; AVX-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_arg_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddb (%rdi), %xmm0, %xmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = load <16 x i8>, <16 x i8>* %src, align 16, !nontemporal !1 %2 = add <16 x i8> %arg, %1 @@ -723,20 +807,38 @@ define <16 x i8> @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %src) { ; And now YMM versions. define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) { -; SSE-LABEL: test_arg_v8f32: -; SSE: # BB#0: -; SSE-NEXT: addps (%rdi), %xmm0 -; SSE-NEXT: addps 16(%rdi), %xmm1 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v8f32: +; SSE2: # BB#0: +; SSE2-NEXT: addps (%rdi), %xmm0 +; SSE2-NEXT: addps 16(%rdi), %xmm1 +; SSE2-NEXT: retq ; -; AVX-LABEL: test_arg_v8f32: -; AVX: # BB#0: -; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0 -; AVX-NEXT: retq +; SSE41-LABEL: test_arg_v8f32: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 16(%rdi), %xmm2 +; SSE41-NEXT: movntdqa (%rdi), %xmm3 +; SSE41-NEXT: addps %xmm3, %xmm0 +; SSE41-NEXT: addps %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test_arg_v8f32: +; AVX1: # BB#0: +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_arg_v8f32: +; AVX2: # BB#0: +; AVX2-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v8f32: ; AVX512: # BB#0: -; AVX512-NEXT: vaddps (%rdi), %ymm0, %ymm0 +; AVX512-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512-NEXT: vaddps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <8 x float>, <8 x float>* %src, align 32, !nontemporal !1 %2 = fadd <8 x float> %arg, %1 @@ -744,51 +846,90 @@ define <8 x float> @test_arg_v8f32(<8 x float> %arg, <8 x float>* %src) { } define <8 x i32> @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %src) { -; SSE-LABEL: test_arg_v8i32: -; SSE: # BB#0: -; SSE-NEXT: paddd (%rdi), %xmm0 -; SSE-NEXT: paddd 16(%rdi), %xmm1 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v8i32: +; SSE2: # BB#0: +; SSE2-NEXT: paddd (%rdi), %xmm0 +; SSE2-NEXT: paddd 16(%rdi), %xmm1 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v8i32: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 16(%rdi), %xmm2 +; SSE41-NEXT: movntdqa (%rdi), %xmm3 +; SSE41-NEXT: paddd %xmm3, %xmm0 +; SSE41-NEXT: paddd %xmm2, %xmm1 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v8i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v8i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_arg_v8i32: -; AVX512: # BB#0: -; AVX512-NEXT: vpaddd (%rdi), %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_arg_v8i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512F-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: test_arg_v8i32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512VL-LABEL: test_arg_v8i32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 +; AVX512VL-NEXT: retq %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1 %2 = add <8 x i32> %arg, %1 ret <8 x i32> %2 } define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) { -; SSE-LABEL: test_arg_v4f64: -; SSE: # BB#0: -; SSE-NEXT: addpd (%rdi), %xmm0 -; SSE-NEXT: addpd 16(%rdi), %xmm1 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v4f64: +; SSE2: # BB#0: +; SSE2-NEXT: addpd (%rdi), %xmm0 +; SSE2-NEXT: addpd 16(%rdi), %xmm1 +; SSE2-NEXT: retq ; -; AVX-LABEL: test_arg_v4f64: -; AVX: # BB#0: -; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 -; AVX-NEXT: retq +; SSE41-LABEL: test_arg_v4f64: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 16(%rdi), %xmm2 +; SSE41-NEXT: movntdqa (%rdi), %xmm3 +; SSE41-NEXT: addpd %xmm3, %xmm0 +; SSE41-NEXT: addpd %xmm2, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test_arg_v4f64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_arg_v4f64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v4f64: ; AVX512: # BB#0: -; AVX512-NEXT: vaddpd (%rdi), %ymm0, %ymm0 +; AVX512-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x double>, <4 x double>* %src, align 32, !nontemporal !1 %2 = fadd <4 x double> %arg, %1 @@ -796,30 +937,40 @@ define <4 x double> @test_arg_v4f64(<4 x double> %arg, <4 x double>* %src) { } define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) { -; SSE-LABEL: test_arg_v4i64: -; SSE: # BB#0: -; SSE-NEXT: paddq (%rdi), %xmm0 -; SSE-NEXT: paddq 16(%rdi), %xmm1 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v4i64: +; SSE2: # BB#0: +; SSE2-NEXT: paddq (%rdi), %xmm0 +; SSE2-NEXT: paddq 16(%rdi), %xmm1 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v4i64: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 16(%rdi), %xmm2 +; SSE41-NEXT: movntdqa (%rdi), %xmm3 +; SSE41-NEXT: paddq %xmm3, %xmm0 +; SSE41-NEXT: paddq %xmm2, %xmm1 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v4i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v4i64: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v4i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddq (%rdi), %ymm0, %ymm0 +; AVX512-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <4 x i64>, <4 x i64>* %src, align 32, !nontemporal !1 %2 = add <4 x i64> %arg, %1 @@ -827,30 +978,40 @@ define <4 x i64> @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %src) { } define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) { -; SSE-LABEL: test_arg_v16i16: -; SSE: # BB#0: -; SSE-NEXT: paddw (%rdi), %xmm0 -; SSE-NEXT: paddw 16(%rdi), %xmm1 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v16i16: +; SSE2: # BB#0: +; SSE2-NEXT: paddw (%rdi), %xmm0 +; SSE2-NEXT: paddw 16(%rdi), %xmm1 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v16i16: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 16(%rdi), %xmm2 +; SSE41-NEXT: movntdqa (%rdi), %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm0 +; SSE41-NEXT: paddw %xmm2, %xmm1 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v16i16: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpaddw %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v16i16: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v16i16: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddw (%rdi), %ymm0, %ymm0 +; AVX512-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <16 x i16>, <16 x i16>* %src, align 32, !nontemporal !1 %2 = add <16 x i16> %arg, %1 @@ -858,30 +1019,40 @@ define <16 x i16> @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %src) { } define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) { -; SSE-LABEL: test_arg_v32i8: -; SSE: # BB#0: -; SSE-NEXT: paddb (%rdi), %xmm0 -; SSE-NEXT: paddb 16(%rdi), %xmm1 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v32i8: +; SSE2: # BB#0: +; SSE2-NEXT: paddb (%rdi), %xmm0 +; SSE2-NEXT: paddb 16(%rdi), %xmm1 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v32i8: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 16(%rdi), %xmm2 +; SSE41-NEXT: movntdqa (%rdi), %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm0 +; SSE41-NEXT: paddb %xmm2, %xmm1 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v32i8: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v32i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddb (%rdi), %ymm0, %ymm0 +; AVX512-NEXT: vmovntdqa (%rdi), %ymm1 +; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %1 = load <32 x i8>, <32 x i8>* %src, align 32, !nontemporal !1 %2 = add <32 x i8> %arg, %1 @@ -891,23 +1062,50 @@ define <32 x i8> @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %src) { ; And now ZMM versions. define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) { -; SSE-LABEL: test_arg_v16f32: -; SSE: # BB#0: -; SSE-NEXT: addps (%rdi), %xmm0 -; SSE-NEXT: addps 16(%rdi), %xmm1 -; SSE-NEXT: addps 32(%rdi), %xmm2 -; SSE-NEXT: addps 48(%rdi), %xmm3 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v16f32: +; SSE2: # BB#0: +; SSE2-NEXT: addps (%rdi), %xmm0 +; SSE2-NEXT: addps 16(%rdi), %xmm1 +; SSE2-NEXT: addps 32(%rdi), %xmm2 +; SSE2-NEXT: addps 48(%rdi), %xmm3 +; SSE2-NEXT: retq ; -; AVX-LABEL: test_arg_v16f32: -; AVX: # BB#0: -; AVX-NEXT: vaddps (%rdi), %ymm0, %ymm0 -; AVX-NEXT: vaddps 32(%rdi), %ymm1, %ymm1 -; AVX-NEXT: retq +; SSE41-LABEL: test_arg_v16f32: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 48(%rdi), %xmm4 +; SSE41-NEXT: movntdqa 32(%rdi), %xmm5 +; SSE41-NEXT: movntdqa 16(%rdi), %xmm6 +; SSE41-NEXT: movntdqa (%rdi), %xmm7 +; SSE41-NEXT: addps %xmm7, %xmm0 +; SSE41-NEXT: addps %xmm6, %xmm1 +; SSE41-NEXT: addps %xmm5, %xmm2 +; SSE41-NEXT: addps %xmm4, %xmm3 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test_arg_v16f32: +; AVX1: # BB#0: +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm3 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_arg_v16f32: +; AVX2: # BB#0: +; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX2-NEXT: vaddps %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v16f32: ; AVX512: # BB#0: -; AVX512-NEXT: vaddps (%rdi), %zmm0, %zmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %zmm1 +; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x float>, <16 x float>* %src, align 64, !nontemporal !1 %2 = fadd <16 x float> %arg, %1 @@ -915,39 +1113,54 @@ define <16 x float> @test_arg_v16f32(<16 x float> %arg, <16 x float>* %src) { } define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) { -; SSE-LABEL: test_arg_v16i32: -; SSE: # BB#0: -; SSE-NEXT: paddd (%rdi), %xmm0 -; SSE-NEXT: paddd 16(%rdi), %xmm1 -; SSE-NEXT: paddd 32(%rdi), %xmm2 -; SSE-NEXT: paddd 48(%rdi), %xmm3 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v16i32: +; SSE2: # BB#0: +; SSE2-NEXT: paddd (%rdi), %xmm0 +; SSE2-NEXT: paddd 16(%rdi), %xmm1 +; SSE2-NEXT: paddd 32(%rdi), %xmm2 +; SSE2-NEXT: paddd 48(%rdi), %xmm3 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v16i32: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 48(%rdi), %xmm4 +; SSE41-NEXT: movntdqa 32(%rdi), %xmm5 +; SSE41-NEXT: movntdqa 16(%rdi), %xmm6 +; SSE41-NEXT: movntdqa (%rdi), %xmm7 +; SSE41-NEXT: paddd %xmm7, %xmm0 +; SSE41-NEXT: paddd %xmm6, %xmm1 +; SSE41-NEXT: paddd %xmm5, %xmm2 +; SSE41-NEXT: paddd %xmm4, %xmm3 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v16i32: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm2 -; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vpaddd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm4 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpaddd %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddd %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpaddd %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v16i32: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddd (%rdi), %ymm0, %ymm0 -; AVX2-NEXT: vpaddd 32(%rdi), %ymm1, %ymm1 +; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX2-NEXT: vpaddd %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v16i32: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddd (%rdi), %zmm0, %zmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %zmm1 +; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %1 = load <16 x i32>, <16 x i32>* %src, align 64, !nontemporal !1 %2 = add <16 x i32> %arg, %1 @@ -955,23 +1168,50 @@ define <16 x i32> @test_arg_v16i32(<16 x i32> %arg, <16 x i32>* %src) { } define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) { -; SSE-LABEL: test_arg_v8f64: -; SSE: # BB#0: -; SSE-NEXT: addpd (%rdi), %xmm0 -; SSE-NEXT: addpd 16(%rdi), %xmm1 -; SSE-NEXT: addpd 32(%rdi), %xmm2 -; SSE-NEXT: addpd 48(%rdi), %xmm3 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v8f64: +; SSE2: # BB#0: +; SSE2-NEXT: addpd (%rdi), %xmm0 +; SSE2-NEXT: addpd 16(%rdi), %xmm1 +; SSE2-NEXT: addpd 32(%rdi), %xmm2 +; SSE2-NEXT: addpd 48(%rdi), %xmm3 +; SSE2-NEXT: retq ; -; AVX-LABEL: test_arg_v8f64: -; AVX: # BB#0: -; AVX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 -; AVX-NEXT: vaddpd 32(%rdi), %ymm1, %ymm1 -; AVX-NEXT: retq +; SSE41-LABEL: test_arg_v8f64: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 48(%rdi), %xmm4 +; SSE41-NEXT: movntdqa 32(%rdi), %xmm5 +; SSE41-NEXT: movntdqa 16(%rdi), %xmm6 +; SSE41-NEXT: movntdqa (%rdi), %xmm7 +; SSE41-NEXT: addpd %xmm7, %xmm0 +; SSE41-NEXT: addpd %xmm6, %xmm1 +; SSE41-NEXT: addpd %xmm5, %xmm2 +; SSE41-NEXT: addpd %xmm4, %xmm3 +; SSE41-NEXT: retq +; +; AVX1-LABEL: test_arg_v8f64: +; AVX1: # BB#0: +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm3 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_arg_v8f64: +; AVX2: # BB#0: +; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX2-NEXT: vaddpd %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v8f64: ; AVX512: # BB#0: -; AVX512-NEXT: vaddpd (%rdi), %zmm0, %zmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %zmm1 +; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x double>, <8 x double>* %src, align 64, !nontemporal !1 %2 = fadd <8 x double> %arg, %1 @@ -979,39 +1219,54 @@ define <8 x double> @test_arg_v8f64(<8 x double> %arg, <8 x double>* %src) { } define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) { -; SSE-LABEL: test_arg_v8i64: -; SSE: # BB#0: -; SSE-NEXT: paddq (%rdi), %xmm0 -; SSE-NEXT: paddq 16(%rdi), %xmm1 -; SSE-NEXT: paddq 32(%rdi), %xmm2 -; SSE-NEXT: paddq 48(%rdi), %xmm3 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v8i64: +; SSE2: # BB#0: +; SSE2-NEXT: paddq (%rdi), %xmm0 +; SSE2-NEXT: paddq 16(%rdi), %xmm1 +; SSE2-NEXT: paddq 32(%rdi), %xmm2 +; SSE2-NEXT: paddq 48(%rdi), %xmm3 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v8i64: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 48(%rdi), %xmm4 +; SSE41-NEXT: movntdqa 32(%rdi), %xmm5 +; SSE41-NEXT: movntdqa 16(%rdi), %xmm6 +; SSE41-NEXT: movntdqa (%rdi), %xmm7 +; SSE41-NEXT: paddq %xmm7, %xmm0 +; SSE41-NEXT: paddq %xmm6, %xmm1 +; SSE41-NEXT: paddq %xmm5, %xmm2 +; SSE41-NEXT: paddq %xmm4, %xmm3 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v8i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm2 -; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vpaddq %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm4 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpaddq %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddq %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v8i64: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddq (%rdi), %ymm0, %ymm0 -; AVX2-NEXT: vpaddq 32(%rdi), %ymm1, %ymm1 +; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX2-NEXT: vpaddq %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_arg_v8i64: ; AVX512: # BB#0: -; AVX512-NEXT: vpaddq (%rdi), %zmm0, %zmm0 +; AVX512-NEXT: vmovntdqa (%rdi), %zmm1 +; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %1 = load <8 x i64>, <8 x i64>* %src, align 64, !nontemporal !1 %2 = add <8 x i64> %arg, %1 @@ -1019,51 +1274,70 @@ define <8 x i64> @test_arg_v8i64(<8 x i64> %arg, <8 x i64>* %src) { } define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) { -; SSE-LABEL: test_arg_v32i16: -; SSE: # BB#0: -; SSE-NEXT: paddw (%rdi), %xmm0 -; SSE-NEXT: paddw 16(%rdi), %xmm1 -; SSE-NEXT: paddw 32(%rdi), %xmm2 -; SSE-NEXT: paddw 48(%rdi), %xmm3 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v32i16: +; SSE2: # BB#0: +; SSE2-NEXT: paddw (%rdi), %xmm0 +; SSE2-NEXT: paddw 16(%rdi), %xmm1 +; SSE2-NEXT: paddw 32(%rdi), %xmm2 +; SSE2-NEXT: paddw 48(%rdi), %xmm3 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v32i16: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 48(%rdi), %xmm4 +; SSE41-NEXT: movntdqa 32(%rdi), %xmm5 +; SSE41-NEXT: movntdqa 16(%rdi), %xmm6 +; SSE41-NEXT: movntdqa (%rdi), %xmm7 +; SSE41-NEXT: paddw %xmm7, %xmm0 +; SSE41-NEXT: paddw %xmm6, %xmm1 +; SSE41-NEXT: paddw %xmm5, %xmm2 +; SSE41-NEXT: paddw %xmm4, %xmm3 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm2 -; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vpaddw %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vpaddw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm4 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpaddw %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpaddw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v32i16: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddw (%rdi), %ymm0, %ymm0 -; AVX2-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1 +; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX2-NEXT: vpaddw %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test_arg_v32i16: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpaddw (%rdi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX512F-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_arg_v32i16: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpaddw (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm1 +; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VL-LABEL: test_arg_v32i16: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 -; AVX512VL-NEXT: vpaddw 32(%rdi), %ymm1, %ymm1 +; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX512VL-NEXT: vpaddw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: retq %1 = load <32 x i16>, <32 x i16>* %src, align 64, !nontemporal !1 %2 = add <32 x i16> %arg, %1 @@ -1071,51 +1345,70 @@ define <32 x i16> @test_arg_v32i16(<32 x i16> %arg, <32 x i16>* %src) { } define <64 x i8> @test_arg_v64i8(<64 x i8> %arg, <64 x i8>* %src) { -; SSE-LABEL: test_arg_v64i8: -; SSE: # BB#0: -; SSE-NEXT: paddb (%rdi), %xmm0 -; SSE-NEXT: paddb 16(%rdi), %xmm1 -; SSE-NEXT: paddb 32(%rdi), %xmm2 -; SSE-NEXT: paddb 48(%rdi), %xmm3 -; SSE-NEXT: retq +; SSE2-LABEL: test_arg_v64i8: +; SSE2: # BB#0: +; SSE2-NEXT: paddb (%rdi), %xmm0 +; SSE2-NEXT: paddb 16(%rdi), %xmm1 +; SSE2-NEXT: paddb 32(%rdi), %xmm2 +; SSE2-NEXT: paddb 48(%rdi), %xmm3 +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_arg_v64i8: +; SSE41: # BB#0: +; SSE41-NEXT: movntdqa 48(%rdi), %xmm4 +; SSE41-NEXT: movntdqa 32(%rdi), %xmm5 +; SSE41-NEXT: movntdqa 16(%rdi), %xmm6 +; SSE41-NEXT: movntdqa (%rdi), %xmm7 +; SSE41-NEXT: paddb %xmm7, %xmm0 +; SSE41-NEXT: paddb %xmm6, %xmm1 +; SSE41-NEXT: paddb %xmm5, %xmm2 +; SSE41-NEXT: paddb %xmm4, %xmm3 +; SSE41-NEXT: retq ; ; AVX1-LABEL: test_arg_v64i8: ; AVX1: # BB#0: -; AVX1-NEXT: vmovdqa (%rdi), %ymm2 -; AVX1-NEXT: vmovdqa 32(%rdi), %ymm3 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vpaddb %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm3 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm4 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpaddb %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpaddb %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpaddb %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_arg_v64i8: ; AVX2: # BB#0: -; AVX2-NEXT: vpaddb (%rdi), %ymm0, %ymm0 -; AVX2-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1 +; AVX2-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX2-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX2-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test_arg_v64i8: ; AVX512F: # BB#0: -; AVX512F-NEXT: vpaddb (%rdi), %ymm0, %ymm0 -; AVX512F-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1 +; AVX512F-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX512F-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_arg_v64i8: ; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpaddb (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: vmovntdqa (%rdi), %zmm1 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VL-LABEL: test_arg_v64i8: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 -; AVX512VL-NEXT: vpaddb 32(%rdi), %ymm1, %ymm1 +; AVX512VL-NEXT: vmovntdqa 32(%rdi), %ymm2 +; AVX512VL-NEXT: vmovntdqa (%rdi), %ymm3 +; AVX512VL-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddb %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: retq %1 = load <64 x i8>, <64 x i8>* %src, align 64, !nontemporal !1 %2 = add <64 x i8> %arg, %1 diff --git a/test/CodeGen/X86/pr32659.ll b/test/CodeGen/X86/pr32659.ll new file mode 100644 index 000000000000..aafae9c4f6c9 --- /dev/null +++ b/test/CodeGen/X86/pr32659.ll @@ -0,0 +1,83 @@ +; RUN: llc -o - %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +@a = external global i32, align 4 +@d = external global i32*, align 4 +@k = external global i32**, align 4 +@j = external global i32***, align 4 +@h = external global i32, align 4 +@c = external global i32, align 4 +@i = external global i32, align 4 +@b = external global i32, align 4 +@f = external global i64, align 8 +@e = external global i64, align 8 +@g = external global i32, align 4 + +; Function Attrs: norecurse nounwind optsize readnone +declare i32 @fn1(i32 returned) #0 + + +; CHECK-LABEL: fn2 +; CHECK: calll putchar +; CHECK: addl $1, +; CHECK: adcl $0, +; Function Attrs: nounwind optsize +define void @fn2() #1 { +entry: + %putchar = tail call i32 @putchar(i32 48) + %0 = load volatile i32, i32* @h, align 4 + %1 = load i32, i32* @c, align 4, !tbaa !2 + %2 = load i32***, i32**** @j, align 4 + %3 = load i32**, i32*** %2, align 4 + %4 = load i32*, i32** %3, align 4 + %5 = load i32, i32* %4, align 4 + %cmp = icmp sgt i32 %1, %5 + %conv = zext i1 %cmp to i32 + %6 = load i32, i32* @i, align 4 + %cmp1 = icmp sgt i32 %6, %conv + %conv2 = zext i1 %cmp1 to i32 + store i32 %conv2, i32* @b, align 4 + %cmp3 = icmp sgt i32 %0, %conv2 + %conv4 = zext i1 %cmp3 to i32 + %7 = load i32, i32* @a, align 4 + %or = xor i32 %7, %conv4 + store i32 %or, i32* @a, align 4 + %8 = load i32*, i32** @d, align 4 + %9 = load i32, i32* %8, align 4 + %conv6 = sext i32 %9 to i64 + %10 = load i64, i64* @e, align 8 + %and = and i64 %10, %conv6 + store i64 %and, i64* @e, align 8 + %11 = load i32, i32* @g, align 4 + %dec = add nsw i32 %11, -1 + store i32 %dec, i32* @g, align 4 + %12 = load i64, i64* @f, align 8 + %inc = add nsw i64 %12, 1 + store i64 %inc, i64* @f, align 8 + ret void +} + +; Function Attrs: nounwind optsize +declare i32 @main() #1 + +; Function Attrs: nounwind +declare i32 @putchar(i32) #2 + +attributes #0 = { optsize readnone } +attributes #1 = { optsize } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{!"clang version 5.0.0 (trunk 300074) (llvm/trunk 300078)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !4, i64 0} +!8 = !{!9, !9, i64 0} +!9 = !{!"long long", !4, i64 0} diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index 7c2937936313..0e8db74fe1bd 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -314,13 +314,13 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) ; GENERIC-NEXT: jmp LBB7_6 ; GENERIC-NEXT: LBB7_4: ; GENERIC-NEXT: movd %r9d, %xmm1 -; GENERIC-NEXT: movd %ecx, %xmm2 +; GENERIC-NEXT: movd %r8d, %xmm2 ; GENERIC-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; GENERIC-NEXT: movd %r8d, %xmm3 +; GENERIC-NEXT: movd %ecx, %xmm3 ; GENERIC-NEXT: movd %edx, %xmm1 ; GENERIC-NEXT: LBB7_6: ; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; GENERIC-NEXT: psubd {{.*}}(%rip), %xmm1 ; GENERIC-NEXT: psubd {{.*}}(%rip), %xmm0 ; GENERIC-NEXT: movq %xmm0, 16(%rsi) @@ -350,16 +350,16 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; ATOM-NEXT: jmp LBB7_6 ; ATOM-NEXT: LBB7_4: ; ATOM-NEXT: movd %r9d, %xmm1 -; ATOM-NEXT: movd %ecx, %xmm2 +; ATOM-NEXT: movd %r8d, %xmm2 ; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; ATOM-NEXT: movd %r8d, %xmm3 +; ATOM-NEXT: movd %ecx, %xmm3 ; ATOM-NEXT: movd %edx, %xmm1 ; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; ATOM-NEXT: LBB7_6: ; ATOM-NEXT: psubd {{.*}}(%rip), %xmm0 ; ATOM-NEXT: psubd {{.*}}(%rip), %xmm1 diff --git a/test/CodeGen/X86/selectiondag-dominator.ll b/test/CodeGen/X86/selectiondag-dominator.ll new file mode 100644 index 000000000000..f289a16f29eb --- /dev/null +++ b/test/CodeGen/X86/selectiondag-dominator.ll @@ -0,0 +1,30 @@ +; Make sure we don't crash because we have a stale dominator tree. +; PR33266 +; REQUIRES: asserts +; RUN: llc -o /dev/null -verify-dom-info %s + +target triple = "x86_64-unknown-linux-gnu" + +@global = external global [8 x [8 x [4 x i8]]], align 2 +@global.1 = external global { i8, [3 x i8] }, align 4 + +define void @patatino() local_unnamed_addr { +bb: + br label %bb1 + +bb1: + br label %bb2 + +bb2: + br i1 icmp ne (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)), label %bb4, label %bb3 + +bb3: + br i1 icmp eq (i64 ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), i64 0), label %bb5, label %bb4 + +bb4: + %tmp = phi i64 [ ashr (i64 shl (i64 zext (i32 srem (i32 7, i32 zext (i1 icmp eq (i8* getelementptr inbounds ({ i8, [3 x i8] }, { i8, [3 x i8] }* @global.1, i64 0, i32 0), i8* getelementptr inbounds ([8 x [8 x [4 x i8]]], [8 x [8 x [4 x i8]]]* @global, i64 0, i64 6, i64 6, i64 2)) to i32)) to i64), i64 56), i64 56), %bb3 ], [ 7, %bb2 ] + ret void + +bb5: + ret void +} diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 0b03dffe99b5..e468c69db5dd 100644 --- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -53,17 +53,17 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: andl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: andl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebp @@ -86,18 +86,18 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X64-NEXT: shrq $32, %rsi ; X64-NEXT: shrq $32, %rdi ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl %r8d, %edi ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl %eax, %esi ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> @@ -121,15 +121,15 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) ; X32-NEXT: notl %edx -; X32-NEXT: notl %ecx ; X32-NEXT: notl %esi +; X32-NEXT: notl %ecx ; X32-NEXT: notl %eax ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, (%esp) -; X32-NEXT: andl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: andl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X32-NEXT: andl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -138,7 +138,7 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebp @@ -165,18 +165,18 @@ define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind ; X64-NEXT: notl %esi ; X64-NEXT: notl %edx ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl %r8d, %edx ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl %edi, %esi ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> @@ -1277,17 +1277,17 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: orl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: orl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: orl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebp @@ -1310,18 +1310,18 @@ define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X64-NEXT: shrq $32, %rsi ; X64-NEXT: shrq $32, %rdi ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; X64-NEXT: orl %r8d, %edi ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; X64-NEXT: orl %eax, %esi ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> @@ -1538,16 +1538,16 @@ define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) n ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_ps: ; X64: # BB#0: -; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm1[0] ; X64-NEXT: movaps %xmm3, %xmm0 ; X64-NEXT: retq %res0 = insertelement <4 x float> undef, float %a3, i32 0 @@ -1677,16 +1677,16 @@ define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_ps: ; X64: # BB#0: -; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq %res0 = insertelement <4 x float> undef, float %a0, i32 0 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 @@ -2239,17 +2239,17 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: xorl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %ebp @@ -2272,18 +2272,18 @@ define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X64-NEXT: shrq $32, %rsi ; X64-NEXT: shrq $32, %rdi ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; X64-NEXT: xorl %r8d, %edi ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; X64-NEXT: xorl %eax, %esi ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll index f4964b5a6f66..c74dec3e21b6 100644 --- a/test/CodeGen/X86/sse1.ll +++ b/test/CodeGen/X86/sse1.ll @@ -87,17 +87,17 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: .LBB1_11: # %entry ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X32-NEXT: retl ; ; X64-LABEL: vselect: ; X64: # BB#0: # %entry -; X64-NEXT: testl %ecx, %ecx +; X64-NEXT: testl %edx, %edx ; X64-NEXT: xorps %xmm0, %xmm0 ; X64-NEXT: je .LBB1_1 ; X64-NEXT: # BB#2: # %entry ; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: jne .LBB1_5 ; X64-NEXT: .LBB1_4: ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero @@ -111,7 +111,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X64-NEXT: jmp .LBB1_11 ; X64-NEXT: .LBB1_1: ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: testl %edx, %edx +; X64-NEXT: testl %ecx, %ecx ; X64-NEXT: je .LBB1_4 ; X64-NEXT: .LBB1_5: # %entry ; X64-NEXT: xorps %xmm2, %xmm2 @@ -126,7 +126,7 @@ define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) { ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: .LBB1_11: # %entry ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq entry: %a1 = icmp eq <4 x i32> %q, zeroinitializer @@ -252,12 +252,12 @@ define <2 x float> @PR31672() #0 { ; X32-NEXT: movl %eax, (%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: andl %ecx, %edx -; X32-NEXT: notl %ecx -; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl %edx, %ecx -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X32-NEXT: andl %eax, %ecx +; X32-NEXT: notl %eax +; X32-NEXT: andl {{[0-9]+}}(%esp), %eax +; X32-NEXT: orl %ecx, %eax +; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: andl %ecx, %edx @@ -277,7 +277,7 @@ define <2 x float> @PR31672() #0 { ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: retl @@ -297,48 +297,48 @@ define <2 x float> @PR31672() #0 { ; X64-NEXT: mulps %xmm1, %xmm0 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r9 -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi -; X64-NEXT: movl %r9d, %esi -; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %esi, %eax +; X64-NEXT: andl %edi, %eax ; X64-NEXT: movl %edi, %ecx ; X64-NEXT: notl %ecx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; X64-NEXT: andl %eax, %ecx -; X64-NEXT: orl %esi, %ecx +; X64-NEXT: andl %edx, %ecx +; X64-NEXT: orl %eax, %ecx ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: andl %r10d, %ecx -; X64-NEXT: movl %r10d, %esi -; X64-NEXT: notl %esi -; X64-NEXT: andl %edx, %esi -; X64-NEXT: orl %ecx, %esi -; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) -; X64-NEXT: shrq $32, %r9 +; X64-NEXT: shrq $32, %rsi ; X64-NEXT: shrq $32, %rdi -; X64-NEXT: andl %edi, %r9d +; X64-NEXT: andl %edi, %esi ; X64-NEXT: notl %edi -; X64-NEXT: shrq $32, %rax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: orl %r9d, %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; X64-NEXT: shrq $32, %r8 -; X64-NEXT: shrq $32, %r10 -; X64-NEXT: andl %r10d, %r8d -; X64-NEXT: notl %r10d ; X64-NEXT: shrq $32, %rdx -; X64-NEXT: andl %r10d, %edx -; X64-NEXT: orl %r8d, %edx +; X64-NEXT: andl %edi, %edx +; X64-NEXT: orl %esi, %edx ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %r8d, %eax +; X64-NEXT: andl %r9d, %eax +; X64-NEXT: movl %r9d, %ecx +; X64-NEXT: notl %ecx +; X64-NEXT: andl %r10d, %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; X64-NEXT: shrq $32, %r8 +; X64-NEXT: shrq $32, %r9 +; X64-NEXT: andl %r9d, %r8d +; X64-NEXT: notl %r9d +; X64-NEXT: shrq $32, %r10 +; X64-NEXT: andl %r9d, %r10d +; X64-NEXT: orl %r8d, %r10d +; X64-NEXT: movl %r10d, -{{[0-9]+}}(%rsp) ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %t0 = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> <float 42.0, float 3.0>) ret <2 x float> %t0 diff --git a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 20387ccd6b7a..ff5d624e6042 100644 --- a/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -2076,7 +2076,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm2 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -2087,8 +2087,8 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm1 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -2099,7 +2099,7 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm3 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -2110,27 +2110,27 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi8: ; X64: # BB#0: ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: movd %eax, %xmm1 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movzbl %dl, %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; X64-NEXT: movzbl %dl, %eax +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-NEXT: movzbl %r8b, %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl %r9b, %eax ; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax @@ -2138,20 +2138,20 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm1 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-NEXT: movzbl %r9b, %eax +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm2 @@ -2161,9 +2161,9 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 @@ -2206,11 +2206,11 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] -; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi16: @@ -2218,20 +2218,20 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, ; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w ; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax ; X64-NEXT: movd %edi, %xmm0 -; X64-NEXT: movd %r8d, %xmm1 +; X64-NEXT: movd %esi, %xmm1 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; X64-NEXT: movd %edx, %xmm0 -; X64-NEXT: movd %eax, %xmm2 +; X64-NEXT: movd %ecx, %xmm2 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64-NEXT: movd %esi, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movd %r8d, %xmm0 ; X64-NEXT: movd %r9d, %xmm1 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-NEXT: movd %ecx, %xmm3 +; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: movd %r10d, %xmm0 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 @@ -2254,18 +2254,18 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi32: ; X64: # BB#0: ; X64-NEXT: movd %edi, %xmm0 -; X64-NEXT: movd %edx, %xmm1 +; X64-NEXT: movd %esi, %xmm1 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: movd %edx, %xmm2 ; X64-NEXT: movd %ecx, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 @@ -2282,11 +2282,11 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { ; X32: # BB#0: ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi64x: @@ -2441,10 +2441,9 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { ; X32-LABEL: test_mm_set1_epi64x: ; X32: # BB#0: ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set1_epi64x: @@ -2486,7 +2485,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 % ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm2 ; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -2497,8 +2496,8 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 % ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm1 ; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -2509,7 +2508,7 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 % ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm3 ; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -2520,9 +2519,9 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 % ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; X32-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi8: @@ -2534,46 +2533,46 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 % ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl %cl, %eax +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl %r9b, %eax +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm1 ; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl %r9b, %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl %dl, %eax +; X64-NEXT: movzbl %r8b, %eax ; X64-NEXT: movd %eax, %xmm3 ; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movzbl %r8b, %eax +; X64-NEXT: movzbl %dl, %eax ; X64-NEXT: movd %eax, %xmm2 ; X64-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movzbl %sil, %eax ; X64-NEXT: movd %eax, %xmm4 ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: movd %eax, %xmm0 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 @@ -2616,11 +2615,11 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4 ; X32-NEXT: movd %eax, %xmm0 ; X32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; X32-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; X32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] ; X32-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] ; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3] -; X32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi16: @@ -2628,20 +2627,20 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4 ; X64-NEXT: movw {{[0-9]+}}(%rsp), %ax ; X64-NEXT: movw {{[0-9]+}}(%rsp), %r10w ; X64-NEXT: movd %eax, %xmm0 -; X64-NEXT: movd %ecx, %xmm1 +; X64-NEXT: movd %r10d, %xmm1 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; X64-NEXT: movd %r9d, %xmm0 -; X64-NEXT: movd %esi, %xmm2 +; X64-NEXT: movd %r8d, %xmm2 ; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64-NEXT: movd %r10d, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movd %ecx, %xmm0 ; X64-NEXT: movd %edx, %xmm1 ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-NEXT: movd %r8d, %xmm3 +; X64-NEXT: movd %esi, %xmm3 ; X64-NEXT: movd %edi, %xmm0 ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 @@ -2664,18 +2663,18 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi32: ; X64: # BB#0: ; X64-NEXT: movd %ecx, %xmm0 -; X64-NEXT: movd %esi, %xmm1 +; X64-NEXT: movd %edx, %xmm1 ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: movd %edx, %xmm2 +; X64-NEXT: movd %esi, %xmm2 ; X64-NEXT: movd %edi, %xmm0 ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 @@ -2692,11 +2691,11 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { ; X32: # BB#0: ; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi64x: diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll index 4d895ea264c5..b5aa26f532ef 100644 --- a/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -342,9 +342,8 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: subss %xmm1, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1,1,3] -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE-NEXT: movapd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test14: @@ -375,8 +374,7 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: addss %xmm0, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,2,1] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[0,0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -417,10 +415,10 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: addss %xmm0, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE-NEXT: movapd %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test16: diff --git a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll index 19305d0dad62..383ab21bd404 100644 --- a/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -354,8 +354,9 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_mm_crc32_u8(i32 %a0, i8 %a1) { ; X32-LABEL: test_mm_crc32_u8: ; X32: # BB#0: +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: crc32b {{[0-9]+}}(%esp), %eax +; X32-NEXT: crc32b %cl, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_crc32_u8: @@ -371,8 +372,9 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone define i32 @test_mm_crc32_u16(i32 %a0, i16 %a1) { ; X32-LABEL: test_mm_crc32_u16: ; X32: # BB#0: +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: crc32w {{[0-9]+}}(%esp), %eax +; X32-NEXT: crc32w %cx, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_crc32_u16: diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll index f937d484ce0d..4165aea8794f 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -1651,9 +1651,26 @@ define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) { } declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone -; TODO stack_fold_sqrtsd +define double @stack_fold_sqrtsd(double %a0) { + ;CHECK-LABEL: stack_fold_sqrtsd + ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call double @llvm.sqrt.f64(double %a0) + ret double %2 +} +declare double @llvm.sqrt.f64(double) nounwind readnone + ; TODO stack_fold_sqrtsd_int -; TODO stack_fold_sqrtss + +define float @stack_fold_sqrtss(float %a0) { + ;CHECK-LABEL: stack_fold_sqrtss + ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call float @llvm.sqrt.f32(float %a0) + ret float %2 +} +declare float @llvm.sqrt.f32(float) nounwind readnone + ; TODO stack_fold_sqrtss_int define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) { diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll index 5c6f697610a0..3ca94b7b9467 100644 --- a/test/CodeGen/X86/stack-folding-int-sse42.ll +++ b/test/CodeGen/X86/stack-folding-int-sse42.ll @@ -453,6 +453,21 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin ; TODO stack_fold_pextrb +; We can't naively fold pextrw as it only writes to a 16-bit memory location +; even though it can store to a 32-bit register. +define i16 @stack_fold_pextrw(<8 x i16> %a0) { +; CHECK-LABEL: stack_fold_pextrw +; CHECK: pextrw $1, {{%xmm[0-9][0-9]*}}, %[[GPR32:(e[a-z]+|r[0-9]+d)]] +; CHECK: movl %[[GPR32]], {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Spill +; CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload +entry: +; add forces execution domain + %add = add <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> + %extract = extractelement <8 x i16> %add, i32 1 + %asm = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() + ret i16 %extract +} + define i32 @stack_fold_pextrd(<4 x i32> %a0) { ;CHECK-LABEL: stack_fold_pextrd ;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill @@ -473,8 +488,6 @@ define i64 @stack_fold_pextrq(<2 x i64> %a0) { ret i64 %1 } -; TODO stack_fold_pextrw - define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) { ;CHECK-LABEL: stack_fold_phaddd ;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll index 3c99928824bc..8e253f11e93e 100644 --- a/test/CodeGen/X86/trunc-to-bool.ll +++ b/test/CodeGen/X86/trunc-to-bool.ll @@ -1,16 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; An integer truncation to i1 should be done with an and instruction to make ; sure only the LSBit survives. Test that this is the case both for a returned ; value and as the operand of a branch. ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s define zeroext i1 @test1(i32 %X) nounwind { +; CHECK-LABEL: test1: +; CHECK: # BB#0: +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: retl %Y = trunc i32 %X to i1 ret i1 %Y } -; CHECK-LABEL: test1: -; CHECK: andb $1, %al define i1 @test2(i32 %val, i32 %mask) nounwind { +; CHECK-LABEL: test2: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: btl %ecx, %eax +; CHECK-NEXT: jae .LBB1_2 +; CHECK-NEXT: # BB#1: # %ret_true +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB1_2: # %ret_false +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retl entry: %shifted = ashr i32 %val, %mask %anded = and i32 %shifted, 1 @@ -21,10 +37,19 @@ ret_true: ret_false: ret i1 false } -; CHECK-LABEL: test2: -; CHECK: btl define i32 @test3(i8* %ptr) nounwind { +; CHECK-LABEL: test3: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: testb $1, (%eax) +; CHECK-NEXT: je .LBB2_2 +; CHECK-NEXT: # BB#1: # %cond_true +; CHECK-NEXT: movl $21, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB2_2: # %cond_false +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retl %val = load i8, i8* %ptr %tmp = trunc i8 %val to i1 br i1 %tmp, label %cond_true, label %cond_false @@ -33,10 +58,18 @@ cond_true: cond_false: ret i32 42 } -; CHECK-LABEL: test3: -; CHECK: testb $1, (%eax) define i32 @test4(i8* %ptr) nounwind { +; CHECK-LABEL: test4: +; CHECK: # BB#0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB3_2 +; CHECK-NEXT: # BB#1: # %cond_true +; CHECK-NEXT: movl $21, %eax +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB3_2: # %cond_false +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: retl %tmp = ptrtoint i8* %ptr to i1 br i1 %tmp, label %cond_true, label %cond_false cond_true: @@ -44,10 +77,29 @@ cond_true: cond_false: ret i32 42 } -; CHECK-LABEL: test4: -; CHECK: testb $1, 4(%esp) define i32 @test5(double %d) nounwind { +; CHECK-LABEL: test5: +; CHECK: # BB#0: +; CHECK-NEXT: pushl %eax +; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: fnstcw (%esp) +; CHECK-NEXT: movzwl (%esp), %eax +; CHECK-NEXT: movw $3199, (%esp) # imm = 0xC7F +; CHECK-NEXT: fldcw (%esp) +; CHECK-NEXT: movw %ax, (%esp) +; CHECK-NEXT: fistps {{[0-9]+}}(%esp) +; CHECK-NEXT: fldcw (%esp) +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB4_2 +; CHECK-NEXT: # BB#1: # %cond_true +; CHECK-NEXT: movl $21, %eax +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB4_2: # %cond_false +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: popl %ecx +; CHECK-NEXT: retl %tmp = fptosi double %d to i1 br i1 %tmp, label %cond_true, label %cond_false cond_true: @@ -55,5 +107,3 @@ cond_true: cond_false: ret i32 42 } -; CHECK-LABEL: test5: -; CHECK: testb $1 diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index 477150016486..6cfe41ac503d 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -1320,17 +1320,17 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1560,33 +1560,33 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] +; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE-NEXT: movaps %xmm1, %xmm2 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] +; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movdqa %xmm2, %xmm1 ; SSE-NEXT: retq ; diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index a42b3c96c3ae..7cb1c95cb01a 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -1169,16 +1169,16 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-LABEL: sitofp_4i64_to_4f32_undef: ; SSE: # BB#0: -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -1368,21 +1368,22 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-LABEL: sitofp_4i64_to_4f32: ; SSE: # BB#0: ; SSE-NEXT: movq %xmm1, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_4i64_to_4f32: @@ -1838,21 +1839,14 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-LABEL: uitofp_4i64_to_4f32_undef: ; SSE: # BB#0: ; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: js .LBB41_2 -; SSE-NEXT: # BB#1: -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: .LBB41_2: ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB41_3 -; SSE-NEXT: # BB#4: +; SSE-NEXT: js .LBB41_1 +; SSE-NEXT: # BB#2: ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: jmp .LBB41_5 -; SSE-NEXT: .LBB41_3: +; SSE-NEXT: jmp .LBB41_3 +; SSE-NEXT: .LBB41_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -1860,17 +1854,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 -; SSE-NEXT: .LBB41_5: -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE-NEXT: .LBB41_3: ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB41_6 -; SSE-NEXT: # BB#7: +; SSE-NEXT: js .LBB41_4 +; SSE-NEXT: # BB#5: ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: jmp .LBB41_8 -; SSE-NEXT: .LBB41_6: +; SSE-NEXT: jmp .LBB41_6 +; SSE-NEXT: .LBB41_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -1878,9 +1871,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 -; SSE-NEXT: .LBB41_8: -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: .LBB41_6: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: testq %rax, %rax +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: js .LBB41_8 +; SSE-NEXT: # BB#7: +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: .LBB41_8: +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; SSE-NEXT: retq ; ; VEX-LABEL: uitofp_4i64_to_4f32_undef: @@ -2149,32 +2149,32 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB47_1 ; SSE-NEXT: # BB#2: -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: jmp .LBB47_3 ; SSE-NEXT: .LBB47_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: addss %xmm3, %xmm3 +; SSE-NEXT: cvtsi2ssq %rax, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm2 ; SSE-NEXT: .LBB47_3: -; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB47_4 ; SSE-NEXT: # BB#5: -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 +; SSE-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE-NEXT: jmp .LBB47_6 ; SSE-NEXT: .LBB47_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: addss %xmm2, %xmm2 +; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: addss %xmm3, %xmm3 ; SSE-NEXT: .LBB47_6: -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB47_7 ; SSE-NEXT: # BB#8: @@ -2208,9 +2208,9 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 ; SSE-NEXT: .LBB47_12: -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: uitofp_4i64_to_4f32: @@ -3381,22 +3381,23 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-LABEL: sitofp_load_4i64_to_4f32: ; SSE: # BB#0: ; SSE-NEXT: movdqa (%rdi), %xmm1 -; SSE-NEXT: movdqa 16(%rdi), %xmm2 -; SSE-NEXT: movq %xmm2, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: movdqa 16(%rdi), %xmm0 +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: cvtsi2ssq %rax, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; SSE-NEXT: movq %xmm2, %rax -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_load_4i64_to_4f32: @@ -3546,41 +3547,42 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-LABEL: sitofp_load_8i64_to_8f32: ; SSE: # BB#0: ; SSE-NEXT: movdqa (%rdi), %xmm1 -; SSE-NEXT: movdqa 16(%rdi), %xmm2 -; SSE-NEXT: movdqa 32(%rdi), %xmm3 -; SSE-NEXT: movdqa 48(%rdi), %xmm4 -; SSE-NEXT: movq %xmm2, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm5 +; SSE-NEXT: movdqa 16(%rdi), %xmm0 +; SSE-NEXT: movdqa 32(%rdi), %xmm2 +; SSE-NEXT: movdqa 48(%rdi), %xmm3 +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: cvtsi2ssq %rax, %xmm4 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] ; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] -; SSE-NEXT: movq %xmm2, %rax -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: movq %xmm4, %rax -; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; SSE-NEXT: movq %xmm3, %rax +; SSE-NEXT: xorps %xmm4, %xmm4 +; SSE-NEXT: cvtsi2ssq %rax, %xmm4 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1] +; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1] +; SSE-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; SSE-NEXT: movq %xmm2, %rax +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; SSE-NEXT: movq %xmm3, %rax -; SSE-NEXT: xorps %xmm3, %xmm3 -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_load_8i64_to_8f32: @@ -3822,73 +3824,73 @@ define <8 x float> @sitofp_load_8i8_to_8f32(<8 x i8> *%a) { define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-LABEL: uitofp_load_4i64_to_4f32: ; SSE: # BB#0: -; SSE-NEXT: movdqa (%rdi), %xmm1 -; SSE-NEXT: movdqa 16(%rdi), %xmm3 -; SSE-NEXT: movq %xmm3, %rax +; SSE-NEXT: movdqa (%rdi), %xmm2 +; SSE-NEXT: movdqa 16(%rdi), %xmm0 +; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB76_1 ; SSE-NEXT: # BB#2: -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: jmp .LBB76_3 ; SSE-NEXT: .LBB76_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: addss %xmm2, %xmm2 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: addss %xmm1, %xmm1 ; SSE-NEXT: .LBB76_3: -; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB76_4 ; SSE-NEXT: # BB#5: -; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE-NEXT: jmp .LBB76_6 ; SSE-NEXT: .LBB76_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: addss %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: addss %xmm3, %xmm3 ; SSE-NEXT: .LBB76_6: -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; SSE-NEXT: movq %xmm3, %rax +; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB76_7 ; SSE-NEXT: # BB#8: -; SSE-NEXT: xorps %xmm3, %xmm3 -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: jmp .LBB76_9 ; SSE-NEXT: .LBB76_7: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: xorps %xmm3, %xmm3 -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: addss %xmm3, %xmm3 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: addss %xmm0, %xmm0 ; SSE-NEXT: .LBB76_9: -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] +; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB76_10 ; SSE-NEXT: # BB#11: -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: jmp .LBB76_12 ; SSE-NEXT: .LBB76_10: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: addss %xmm1, %xmm1 +; SSE-NEXT: xorps %xmm2, %xmm2 +; SSE-NEXT: cvtsi2ssq %rax, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm2 ; SSE-NEXT: .LBB76_12: -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: uitofp_load_4i64_to_4f32: @@ -4186,121 +4188,121 @@ define <4 x float> @uitofp_load_4i8_to_4f32(<4 x i8> *%a) { define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-LABEL: uitofp_load_8i64_to_8f32: ; SSE: # BB#0: -; SSE-NEXT: movdqa (%rdi), %xmm1 -; SSE-NEXT: movdqa 16(%rdi), %xmm5 +; SSE-NEXT: movdqa (%rdi), %xmm5 +; SSE-NEXT: movdqa 16(%rdi), %xmm0 ; SSE-NEXT: movdqa 32(%rdi), %xmm2 -; SSE-NEXT: movdqa 48(%rdi), %xmm3 -; SSE-NEXT: movq %xmm5, %rax +; SSE-NEXT: movdqa 48(%rdi), %xmm1 +; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_1 ; SSE-NEXT: # BB#2: -; SSE-NEXT: cvtsi2ssq %rax, %xmm4 +; SSE-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE-NEXT: jmp .LBB80_3 ; SSE-NEXT: .LBB80_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm4 -; SSE-NEXT: addss %xmm4, %xmm4 +; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: addss %xmm3, %xmm3 ; SSE-NEXT: .LBB80_3: -; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_4 ; SSE-NEXT: # BB#5: -; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm4 ; SSE-NEXT: jmp .LBB80_6 ; SSE-NEXT: .LBB80_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: addss %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm4 +; SSE-NEXT: addss %xmm4, %xmm4 ; SSE-NEXT: .LBB80_6: -; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] ; SSE-NEXT: movq %xmm5, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_7 ; SSE-NEXT: # BB#8: -; SSE-NEXT: cvtsi2ssq %rax, %xmm6 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: jmp .LBB80_9 ; SSE-NEXT: .LBB80_7: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm6 -; SSE-NEXT: addss %xmm6, %xmm6 +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: addss %xmm0, %xmm0 ; SSE-NEXT: .LBB80_9: -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE-NEXT: movq %xmm1, %rax +; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] +; SSE-NEXT: movq %xmm5, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_10 ; SSE-NEXT: # BB#11: -; SSE-NEXT: xorps %xmm5, %xmm5 -; SSE-NEXT: cvtsi2ssq %rax, %xmm5 +; SSE-NEXT: cvtsi2ssq %rax, %xmm6 ; SSE-NEXT: jmp .LBB80_12 ; SSE-NEXT: .LBB80_10: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: xorps %xmm5, %xmm5 -; SSE-NEXT: cvtsi2ssq %rax, %xmm5 -; SSE-NEXT: addss %xmm5, %xmm5 +; SSE-NEXT: cvtsi2ssq %rax, %xmm6 +; SSE-NEXT: addss %xmm6, %xmm6 ; SSE-NEXT: .LBB80_12: -; SSE-NEXT: movq %xmm3, %rax +; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_13 ; SSE-NEXT: # BB#14: -; SSE-NEXT: cvtsi2ssq %rax, %xmm7 +; SSE-NEXT: xorps %xmm5, %xmm5 +; SSE-NEXT: cvtsi2ssq %rax, %xmm5 ; SSE-NEXT: jmp .LBB80_15 ; SSE-NEXT: .LBB80_13: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: cvtsi2ssq %rax, %xmm7 -; SSE-NEXT: addss %xmm7, %xmm7 +; SSE-NEXT: xorps %xmm5, %xmm5 +; SSE-NEXT: cvtsi2ssq %rax, %xmm5 +; SSE-NEXT: addss %xmm5, %xmm5 ; SSE-NEXT: .LBB80_15: -; SSE-NEXT: movq %xmm2, %rax +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_16 ; SSE-NEXT: # BB#17: -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm7 ; SSE-NEXT: jmp .LBB80_18 ; SSE-NEXT: .LBB80_16: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: addss %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm7 +; SSE-NEXT: addss %xmm7, %xmm7 ; SSE-NEXT: .LBB80_18: -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] -; SSE-NEXT: movq %xmm3, %rax +; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] +; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB80_19 ; SSE-NEXT: # BB#20: -; SSE-NEXT: xorps %xmm3, %xmm3 -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: jmp .LBB80_21 ; SSE-NEXT: .LBB80_19: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax -; SSE-NEXT: xorps %xmm3, %xmm3 -; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: addss %xmm3, %xmm3 +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: addss %xmm1, %xmm1 ; SSE-NEXT: .LBB80_21: -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1] -; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1] ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: testq %rax, %rax @@ -4318,8 +4320,8 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: addss %xmm2, %xmm2 ; SSE-NEXT: .LBB80_24: -; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: uitofp_load_8i64_to_8f32: diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll index 6439a6dcb00b..918430efea1d 100644 --- a/test/CodeGen/X86/vec_set.ll +++ b/test/CodeGen/X86/vec_set.ll @@ -12,35 +12,35 @@ define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i1 ; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X86-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; X86-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0] ; X86-NEXT: movdqa %xmm3, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: test: ; X64: # BB#0: -; X64-NEXT: movd %r8d, %xmm0 +; X64-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X64-NEXT: movd %edx, %xmm1 -; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: movd %r9d, %xmm0 ; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X64-NEXT: movd %r9d, %xmm2 +; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movd %r8d, %xmm1 +; X64-NEXT: movd %ecx, %xmm2 +; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-NEXT: movd %edx, %xmm1 ; X64-NEXT: movd %esi, %xmm3 -; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] ; X64-NEXT: movdqa %xmm3, (%rdi) ; X64-NEXT: retq %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0 diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll index 4fa9596192a6..ce0b067f5043 100644 --- a/test/CodeGen/X86/vector-compare-results.ll +++ b/test/CodeGen/X86/vector-compare-results.ll @@ -5345,217 +5345,213 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; ; AVX1-LABEL: test_cmp_v64i16: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm9 -; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 -; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm8 -; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm8 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpcmpgtw %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 ; AVX1-NEXT: vpcmpgtw %xmm5, %xmm1, %xmm1 -; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm6 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm2, %xmm2 -; AVX1-NEXT: vpacksswb %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm6 +; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm3, %xmm3 -; AVX1-NEXT: vpacksswb %xmm0, %xmm3, %xmm3 -; AVX1-NEXT: vpextrb $15, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $14, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $12, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $10, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $8, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $6, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $4, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $2, %xmm3, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $0, %xmm3, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $14, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $12, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $13, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $10, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $8, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $11, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $6, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $4, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $9, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $2, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $0, %xmm6, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $7, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $14, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $12, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $10, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $8, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $3, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $6, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $4, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $2, %xmm2, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) ; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $14, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $12, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $10, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $8, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $6, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $4, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $2, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, 4(%rdi) +; AVX1-NEXT: vpextrb $0, %xmm5, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, 4(%rdi) -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $15, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $14, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $14, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $12, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $13, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $10, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $12, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $8, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $11, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $6, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $10, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $4, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $9, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $2, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $8, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $0, %xmm4, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $7, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $6, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $5, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $4, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $3, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $2, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) -; AVX1-NEXT: vpextrb $1, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $14, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $12, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $10, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $8, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $6, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $4, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax +; AVX1-NEXT: movb %al, (%rdi) +; AVX1-NEXT: vpextrb $2, %xmm8, %eax +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: vpextrb $0, %xmm8, %eax -; AVX1-NEXT: andb $1, %al +; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: movb %al, (%rdi) ; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: vzeroupper @@ -5565,207 +5561,203 @@ define <64 x i1> @test_cmp_v64i16(<64 x i16> %a0, <64 x i16> %a1) nounwind { ; AVX2: # BB#0: ; AVX2-NEXT: vpcmpgtw %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 -; AVX2-NEXT: vpacksswb %xmm4, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtw %ymm5, %ymm1, %ymm1 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 -; AVX2-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5 ; AVX2-NEXT: vpcmpgtw %ymm6, %ymm2, %ymm2 -; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4 -; AVX2-NEXT: vpacksswb %xmm4, %xmm2, %xmm2 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm6 ; AVX2-NEXT: vpcmpgtw %ymm7, %ymm3, %ymm3 -; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX2-NEXT: vpacksswb %xmm4, %xmm3, %xmm3 -; AVX2-NEXT: vpextrb $15, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm7 +; AVX2-NEXT: vpextrb $14, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $0, %xmm7, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm3, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) ; AVX2-NEXT: vpextrb $0, %xmm3, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $14, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $13, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $12, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $11, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $10, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $9, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $8, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $0, %xmm6, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $7, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $6, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $5, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $4, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $3, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $2, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $1, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm2, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) ; AVX2-NEXT: vpextrb $0, %xmm2, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, 4(%rdi) -; AVX2-NEXT: vpextrb $15, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $0, %xmm5, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) ; AVX2-NEXT: vpextrb $0, %xmm1, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $15, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $14, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $13, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $12, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $11, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $10, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $9, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $8, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $0, %xmm4, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $7, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $6, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $5, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $4, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $3, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $2, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) -; AVX2-NEXT: vpextrb $1, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) ; AVX2-NEXT: vpextrb $0, %xmm0, %eax -; AVX2-NEXT: andb $1, %al +; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: movb %al, (%rdi) ; AVX2-NEXT: movq %rdi, %rax ; AVX2-NEXT: vzeroupper diff --git a/test/CodeGen/X86/vector-rem.ll b/test/CodeGen/X86/vector-rem.ll index 340dd77ec481..3e3e93a7d5b0 100644 --- a/test/CodeGen/X86/vector-rem.ll +++ b/test/CodeGen/X86/vector-rem.ll @@ -11,9 +11,9 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind { ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %ecx ; CHECK-NEXT: movd %edx, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] ; CHECK-NEXT: movd %xmm3, %eax -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] ; CHECK-NEXT: movd %xmm3, %ecx ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %ecx @@ -24,15 +24,15 @@ define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind { ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %ecx ; CHECK-NEXT: movd %edx, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; CHECK-NEXT: movd %xmm0, %ecx ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %ecx ; CHECK-NEXT: movd %edx, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; CHECK-NEXT: movdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %m = srem <4 x i32> %t, %u @@ -49,9 +49,9 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind { ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx ; CHECK-NEXT: movd %edx, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] ; CHECK-NEXT: movd %xmm3, %eax -; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] ; CHECK-NEXT: movd %xmm3, %ecx ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx @@ -62,15 +62,15 @@ define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind { ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx ; CHECK-NEXT: movd %edx, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; CHECK-NEXT: movd %xmm0, %ecx ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %ecx ; CHECK-NEXT: movd %edx, %xmm0 ; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; CHECK-NEXT: movdqa %xmm2, %xmm0 ; CHECK-NEXT: retq %m = urem <4 x i32> %t, %u @@ -88,9 +88,9 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind { ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] @@ -100,15 +100,15 @@ define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind { ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; CHECK-NEXT: callq fmodf ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklps (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: addq $72, %rsp ; CHECK-NEXT: retq %m = frem <4 x float> %t, %u diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll index 53e471d6f175..392c0de95f24 100644 --- a/test/CodeGen/X86/vector-sext.ll +++ b/test/CodeGen/X86/vector-sext.ll @@ -1333,19 +1333,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $62, %rcx +; SSE2-NEXT: shlq $61, %rcx ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $61, %rcx +; SSE2-NEXT: shlq $62, %rcx ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: shlq $63, %rax ; SSE2-NEXT: sarq $63, %rax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: load_sext_4i1_to_4i32: @@ -1356,19 +1356,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $62, %rcx +; SSSE3-NEXT: shlq $61, %rcx ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $61, %rcx +; SSSE3-NEXT: shlq $62, %rcx ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: shlq $63, %rax ; SSSE3-NEXT: sarq $63, %rax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: load_sext_4i1_to_4i32: @@ -1523,14 +1523,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; SSE2-NEXT: shrl $3, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl %ecx +; SSE2-NEXT: shrl $2, %ecx ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: shrl $2, %eax +; SSE2-NEXT: shrl %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] ; SSE2-NEXT: psllq $63, %xmm0 @@ -1549,14 +1549,14 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { ; SSSE3-NEXT: shrl $3, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl %ecx +; SSSE3-NEXT: shrl $2, %ecx ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSSE3-NEXT: movd %eax, %xmm2 -; SSSE3-NEXT: shrl $2, %eax +; SSSE3-NEXT: shrl %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] ; SSSE3-NEXT: psllq $63, %xmm0 @@ -1813,7 +1813,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; SSE2-NEXT: shrq $7, %rcx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $60, %rcx +; SSE2-NEXT: shlq $57, %rcx ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] @@ -1822,13 +1822,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $62, %rcx +; SSE2-NEXT: shlq $59, %rcx ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $57, %rcx +; SSE2-NEXT: shlq $60, %rcx ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movq %rax, %rcx @@ -1837,15 +1837,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSE2-NEXT: movq %rax, %rcx -; SSE2-NEXT: shlq $59, %rcx +; SSE2-NEXT: shlq $62, %rcx ; SSE2-NEXT: sarq $63, %rcx ; SSE2-NEXT: movd %ecx, %xmm3 ; SSE2-NEXT: shlq $63, %rax ; SSE2-NEXT: sarq $63, %rax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: load_sext_8i1_to_8i16: @@ -1855,7 +1855,7 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; SSSE3-NEXT: shrq $7, %rcx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $60, %rcx +; SSSE3-NEXT: shlq $57, %rcx ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] @@ -1864,13 +1864,13 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $62, %rcx +; SSSE3-NEXT: shlq $59, %rcx ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $57, %rcx +; SSSE3-NEXT: shlq $60, %rcx ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movq %rax, %rcx @@ -1879,15 +1879,15 @@ define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSSE3-NEXT: movq %rax, %rcx -; SSSE3-NEXT: shlq $59, %rcx +; SSSE3-NEXT: shlq $62, %rcx ; SSSE3-NEXT: sarq $63, %rcx ; SSSE3-NEXT: movd %ecx, %xmm3 ; SSSE3-NEXT: shlq $63, %rax ; SSSE3-NEXT: sarq $63, %rax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: load_sext_8i1_to_8i16: @@ -2191,7 +2191,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movzbl (%rdi), %eax ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $6, %ecx +; SSE2-NEXT: shrl $3, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx @@ -2203,30 +2203,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $4, %ecx +; SSE2-NEXT: shrl %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrl $5, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl %ecx +; SSE2-NEXT: shrl $4, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $3, %ecx +; SSE2-NEXT: shrl $6, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: shrl $7, %eax ; SSE2-NEXT: movzwl %ax, %eax ; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: pslld $31, %xmm0 @@ -2240,7 +2240,7 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movzbl (%rdi), %eax ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $6, %ecx +; SSSE3-NEXT: shrl $3, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx @@ -2252,30 +2252,30 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $4, %ecx +; SSSE3-NEXT: shrl %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrl $5, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl %ecx +; SSSE3-NEXT: shrl $4, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $3, %ecx +; SSSE3-NEXT: shrl $6, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: shrl $7, %eax ; SSSE3-NEXT: movzwl %ax, %eax ; SSSE3-NEXT: movd %eax, %xmm3 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: pslld $31, %xmm0 @@ -2546,69 +2546,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { ; SSE2-NEXT: movq %rax, %rsi ; SSE2-NEXT: movq %rax, %rdi ; SSE2-NEXT: movq %rax, %rbp -; SSE2-NEXT: shlq $49, %rbp -; SSE2-NEXT: sarq $63, %rbp +; SSE2-NEXT: shrq $15, %rbp ; SSE2-NEXT: movd %ebp, %xmm0 ; SSE2-NEXT: movq %rax, %rbp ; SSE2-NEXT: movsbq %al, %rax -; SSE2-NEXT: shlq $57, %r8 +; SSE2-NEXT: shlq $49, %r8 ; SSE2-NEXT: sarq $63, %r8 ; SSE2-NEXT: movd %r8d, %xmm1 -; SSE2-NEXT: shlq $53, %r9 +; SSE2-NEXT: shlq $50, %r9 ; SSE2-NEXT: sarq $63, %r9 ; SSE2-NEXT: movd %r9d, %xmm2 -; SSE2-NEXT: shlq $61, %r10 +; SSE2-NEXT: shlq $51, %r10 ; SSE2-NEXT: sarq $63, %r10 ; SSE2-NEXT: movd %r10d, %xmm3 -; SSE2-NEXT: shlq $51, %r11 +; SSE2-NEXT: shlq $52, %r11 ; SSE2-NEXT: sarq $63, %r11 ; SSE2-NEXT: movd %r11d, %xmm4 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: shlq $59, %r14 +; SSE2-NEXT: shlq $53, %r14 ; SSE2-NEXT: sarq $63, %r14 -; SSE2-NEXT: movd %r14d, %xmm5 +; SSE2-NEXT: movd %r14d, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: shlq $55, %r15 +; SSE2-NEXT: shlq $54, %r15 ; SSE2-NEXT: sarq $63, %r15 ; SSE2-NEXT: movd %r15d, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: shlq $63, %r12 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: shlq $55, %r12 ; SSE2-NEXT: sarq $63, %r12 -; SSE2-NEXT: movd %r12d, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] -; SSE2-NEXT: shlq $50, %r13 +; SSE2-NEXT: movd %r12d, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; SSE2-NEXT: shlq $60, %r13 ; SSE2-NEXT: sarq $63, %r13 -; SSE2-NEXT: movd %r13d, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: shlq $58, %rbx +; SSE2-NEXT: movd %r13d, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: shlq $61, %rbx ; SSE2-NEXT: sarq $63, %rbx ; SSE2-NEXT: movd %ebx, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] -; SSE2-NEXT: shlq $54, %rcx +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: shlq $62, %rcx ; SSE2-NEXT: sarq $63, %rcx -; SSE2-NEXT: movd %ecx, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: shlq $62, %rdx +; SSE2-NEXT: movd %ecx, %xmm5 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSE2-NEXT: shlq $63, %rdx ; SSE2-NEXT: sarq $63, %rdx -; SSE2-NEXT: movd %edx, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: shlq $52, %rsi +; SSE2-NEXT: movd %edx, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; SSE2-NEXT: shlq $58, %rsi ; SSE2-NEXT: sarq $63, %rsi -; SSE2-NEXT: movd %esi, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE2-NEXT: shlq $60, %rdi +; SSE2-NEXT: movd %esi, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] +; SSE2-NEXT: shlq $59, %rdi ; SSE2-NEXT: sarq $63, %rdi ; SSE2-NEXT: movd %edi, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSE2-NEXT: shrq $15, %rbp -; SSE2-NEXT: movd %ebp, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE2-NEXT: shlq $57, %rbp +; SSE2-NEXT: sarq $63, %rbp +; SSE2-NEXT: movd %ebp, %xmm2 ; SSE2-NEXT: shrq $7, %rax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %r12 ; SSE2-NEXT: popq %r13 @@ -2640,69 +2640,69 @@ define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { ; SSSE3-NEXT: movq %rax, %rsi ; SSSE3-NEXT: movq %rax, %rdi ; SSSE3-NEXT: movq %rax, %rbp -; SSSE3-NEXT: shlq $49, %rbp -; SSSE3-NEXT: sarq $63, %rbp +; SSSE3-NEXT: shrq $15, %rbp ; SSSE3-NEXT: movd %ebp, %xmm0 ; SSSE3-NEXT: movq %rax, %rbp ; SSSE3-NEXT: movsbq %al, %rax -; SSSE3-NEXT: shlq $57, %r8 +; SSSE3-NEXT: shlq $49, %r8 ; SSSE3-NEXT: sarq $63, %r8 ; SSSE3-NEXT: movd %r8d, %xmm1 -; SSSE3-NEXT: shlq $53, %r9 +; SSSE3-NEXT: shlq $50, %r9 ; SSSE3-NEXT: sarq $63, %r9 ; SSSE3-NEXT: movd %r9d, %xmm2 -; SSSE3-NEXT: shlq $61, %r10 +; SSSE3-NEXT: shlq $51, %r10 ; SSSE3-NEXT: sarq $63, %r10 ; SSSE3-NEXT: movd %r10d, %xmm3 -; SSSE3-NEXT: shlq $51, %r11 +; SSSE3-NEXT: shlq $52, %r11 ; SSSE3-NEXT: sarq $63, %r11 ; SSSE3-NEXT: movd %r11d, %xmm4 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: shlq $59, %r14 +; SSSE3-NEXT: shlq $53, %r14 ; SSSE3-NEXT: sarq $63, %r14 -; SSSE3-NEXT: movd %r14d, %xmm5 +; SSSE3-NEXT: movd %r14d, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSSE3-NEXT: shlq $55, %r15 +; SSSE3-NEXT: shlq $54, %r15 ; SSSE3-NEXT: sarq $63, %r15 ; SSSE3-NEXT: movd %r15d, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSSE3-NEXT: shlq $63, %r12 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSSE3-NEXT: shlq $55, %r12 ; SSSE3-NEXT: sarq $63, %r12 -; SSSE3-NEXT: movd %r12d, %xmm0 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] -; SSSE3-NEXT: shlq $50, %r13 +; SSSE3-NEXT: movd %r12d, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; SSSE3-NEXT: shlq $60, %r13 ; SSSE3-NEXT: sarq $63, %r13 -; SSSE3-NEXT: movd %r13d, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSSE3-NEXT: shlq $58, %rbx +; SSSE3-NEXT: movd %r13d, %xmm4 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: shlq $61, %rbx ; SSSE3-NEXT: sarq $63, %rbx ; SSSE3-NEXT: movd %ebx, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] -; SSSE3-NEXT: shlq $54, %rcx +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: shlq $62, %rcx ; SSSE3-NEXT: sarq $63, %rcx -; SSSE3-NEXT: movd %ecx, %xmm4 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSSE3-NEXT: shlq $62, %rdx +; SSSE3-NEXT: movd %ecx, %xmm5 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSSE3-NEXT: shlq $63, %rdx ; SSSE3-NEXT: sarq $63, %rdx -; SSSE3-NEXT: movd %edx, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSSE3-NEXT: shlq $52, %rsi +; SSSE3-NEXT: movd %edx, %xmm0 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; SSSE3-NEXT: shlq $58, %rsi ; SSSE3-NEXT: sarq $63, %rsi -; SSSE3-NEXT: movd %esi, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSSE3-NEXT: shlq $60, %rdi +; SSSE3-NEXT: movd %esi, %xmm3 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] +; SSSE3-NEXT: shlq $59, %rdi ; SSSE3-NEXT: sarq $63, %rdi ; SSSE3-NEXT: movd %edi, %xmm4 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] -; SSSE3-NEXT: shrq $15, %rbp -; SSSE3-NEXT: movd %ebp, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSSE3-NEXT: shlq $57, %rbp +; SSSE3-NEXT: sarq $63, %rbp +; SSSE3-NEXT: movd %ebp, %xmm2 ; SSSE3-NEXT: shrq $7, %rax -; SSSE3-NEXT: movd %eax, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSSE3-NEXT: movd %eax, %xmm3 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: popq %rbx ; SSSE3-NEXT: popq %r12 ; SSSE3-NEXT: popq %r13 @@ -3002,7 +3002,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movzwl (%rdi), %eax ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $14, %ecx +; SSE2-NEXT: shrl $7, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx @@ -3011,21 +3011,21 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $10, %ecx +; SSE2-NEXT: shrl $5, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $2, %ecx +; SSE2-NEXT: shrl $4, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $12, %ecx +; SSE2-NEXT: shrl $3, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $4, %ecx +; SSE2-NEXT: shrl $2, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] @@ -3033,18 +3033,18 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm1 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: shrl %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $13, %ecx +; SSE2-NEXT: shrl $11, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $5, %ecx +; SSE2-NEXT: shrl $10, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -3053,31 +3053,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm3 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl %ecx +; SSE2-NEXT: shrl $8, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $11, %ecx +; SSE2-NEXT: shrl $13, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $3, %ecx +; SSE2-NEXT: shrl $12, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $7, %ecx +; SSE2-NEXT: shrl $14, %ecx ; SSE2-NEXT: andl $1, %ecx ; SSE2-NEXT: movd %ecx, %xmm2 ; SSE2-NEXT: shrl $15, %eax ; SSE2-NEXT: movzwl %ax, %eax ; SSE2-NEXT: movd %eax, %xmm4 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psllw $15, %xmm0 @@ -3091,7 +3091,7 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movzwl (%rdi), %eax ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $14, %ecx +; SSSE3-NEXT: shrl $7, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx @@ -3100,21 +3100,21 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $10, %ecx +; SSSE3-NEXT: shrl $5, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $2, %ecx +; SSSE3-NEXT: shrl $4, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $12, %ecx +; SSSE3-NEXT: shrl $3, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $4, %ecx +; SSSE3-NEXT: shrl $2, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm3 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] @@ -3122,18 +3122,18 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm1 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $8, %ecx +; SSSE3-NEXT: shrl %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $13, %ecx +; SSSE3-NEXT: shrl $11, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $5, %ecx +; SSSE3-NEXT: shrl $10, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -3142,31 +3142,31 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm3 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl %ecx +; SSSE3-NEXT: shrl $8, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $11, %ecx +; SSSE3-NEXT: shrl $13, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $3, %ecx +; SSSE3-NEXT: shrl $12, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm3 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] ; SSSE3-NEXT: movl %eax, %ecx -; SSSE3-NEXT: shrl $7, %ecx +; SSSE3-NEXT: shrl $14, %ecx ; SSSE3-NEXT: andl $1, %ecx ; SSSE3-NEXT: movd %ecx, %xmm2 ; SSSE3-NEXT: shrl $15, %eax ; SSSE3-NEXT: movzwl %ax, %eax ; SSSE3-NEXT: movd %eax, %xmm4 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSSE3-NEXT: psllw $15, %xmm0 @@ -3556,162 +3556,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { ; SSE2-NEXT: pushq %r13 ; SSE2-NEXT: pushq %r12 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: movswq (%rdi), %rbx -; SSE2-NEXT: movq %rbx, %r10 -; SSE2-NEXT: movq %rbx, %r8 -; SSE2-NEXT: movq %rbx, %r9 -; SSE2-NEXT: movq %rbx, %r11 -; SSE2-NEXT: movq %rbx, %r14 -; SSE2-NEXT: movq %rbx, %r15 -; SSE2-NEXT: movq %rbx, %r12 -; SSE2-NEXT: movq %rbx, %r13 -; SSE2-NEXT: movq %rbx, %rdx -; SSE2-NEXT: movq %rbx, %rsi -; SSE2-NEXT: movq %rbx, %rcx -; SSE2-NEXT: movq %rbx, %rbp -; SSE2-NEXT: movq %rbx, %rax -; SSE2-NEXT: shlq $49, %rax -; SSE2-NEXT: sarq $63, %rax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movq %rbx, %rax -; SSE2-NEXT: shlq $57, %r10 +; SSE2-NEXT: movswq (%rdi), %rax +; SSE2-NEXT: movq %rax, %r10 +; SSE2-NEXT: movq %rax, %r8 +; SSE2-NEXT: movq %rax, %r9 +; SSE2-NEXT: movq %rax, %r11 +; SSE2-NEXT: movq %rax, %r14 +; SSE2-NEXT: movq %rax, %r15 +; SSE2-NEXT: movq %rax, %r12 +; SSE2-NEXT: movq %rax, %r13 +; SSE2-NEXT: movq %rax, %rdx +; SSE2-NEXT: movq %rax, %rsi +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: movq %rax, %rbp +; SSE2-NEXT: movq %rax, %rbx +; SSE2-NEXT: shrq $15, %rbx +; SSE2-NEXT: movd %ebx, %xmm0 +; SSE2-NEXT: movq %rax, %rbx +; SSE2-NEXT: shlq $49, %r10 ; SSE2-NEXT: sarq $63, %r10 ; SSE2-NEXT: movd %r10d, %xmm15 -; SSE2-NEXT: movq %rbx, %r10 -; SSE2-NEXT: movsbq %bl, %rbx +; SSE2-NEXT: movq %rax, %r10 +; SSE2-NEXT: movsbq %al, %rax ; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] -; SSE2-NEXT: shlq $53, %r8 +; SSE2-NEXT: shlq $50, %r8 ; SSE2-NEXT: sarq $63, %r8 ; SSE2-NEXT: movd %r8d, %xmm8 -; SSE2-NEXT: shlq $61, %r9 +; SSE2-NEXT: shlq $51, %r9 ; SSE2-NEXT: sarq $63, %r9 -; SSE2-NEXT: movd %r9d, %xmm2 -; SSE2-NEXT: shlq $51, %r11 +; SSE2-NEXT: movd %r9d, %xmm3 +; SSE2-NEXT: shlq $52, %r11 ; SSE2-NEXT: sarq $63, %r11 ; SSE2-NEXT: movd %r11d, %xmm9 -; SSE2-NEXT: shlq $59, %r14 +; SSE2-NEXT: shlq $53, %r14 ; SSE2-NEXT: sarq $63, %r14 -; SSE2-NEXT: movd %r14d, %xmm5 -; SSE2-NEXT: shlq $55, %r15 +; SSE2-NEXT: movd %r14d, %xmm6 +; SSE2-NEXT: shlq $54, %r15 ; SSE2-NEXT: sarq $63, %r15 ; SSE2-NEXT: movd %r15d, %xmm10 -; SSE2-NEXT: shlq $63, %r12 +; SSE2-NEXT: shlq $55, %r12 ; SSE2-NEXT: sarq $63, %r12 -; SSE2-NEXT: movd %r12d, %xmm0 -; SSE2-NEXT: shlq $50, %r13 +; SSE2-NEXT: movd %r12d, %xmm2 +; SSE2-NEXT: shlq $60, %r13 ; SSE2-NEXT: sarq $63, %r13 ; SSE2-NEXT: movd %r13d, %xmm11 -; SSE2-NEXT: shlq $58, %rdx +; SSE2-NEXT: shlq $61, %rdx ; SSE2-NEXT: sarq $63, %rdx -; SSE2-NEXT: movd %edx, %xmm4 -; SSE2-NEXT: shlq $54, %rsi +; SSE2-NEXT: movd %edx, %xmm5 +; SSE2-NEXT: shlq $62, %rsi ; SSE2-NEXT: sarq $63, %rsi ; SSE2-NEXT: movd %esi, %xmm12 -; SSE2-NEXT: shlq $62, %rcx +; SSE2-NEXT: shlq $63, %rcx ; SSE2-NEXT: sarq $63, %rcx -; SSE2-NEXT: movd %ecx, %xmm6 -; SSE2-NEXT: shlq $52, %rbp +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: shlq $58, %rbp ; SSE2-NEXT: sarq $63, %rbp ; SSE2-NEXT: movd %ebp, %xmm13 -; SSE2-NEXT: shlq $60, %rax -; SSE2-NEXT: sarq $63, %rax -; SSE2-NEXT: movd %eax, %xmm7 -; SSE2-NEXT: shrq $15, %r10 -; SSE2-NEXT: movd %r10d, %xmm14 -; SSE2-NEXT: shrq $7, %rbx -; SSE2-NEXT: movd %ebx, %xmm3 -; SSE2-NEXT: movswq 2(%rdi), %rdx -; SSE2-NEXT: movq %rdx, %r8 -; SSE2-NEXT: movq %rdx, %r9 -; SSE2-NEXT: movq %rdx, %r10 -; SSE2-NEXT: movq %rdx, %r11 -; SSE2-NEXT: movq %rdx, %r14 -; SSE2-NEXT: movq %rdx, %r15 -; SSE2-NEXT: movq %rdx, %r12 -; SSE2-NEXT: movq %rdx, %r13 -; SSE2-NEXT: movq %rdx, %rbx -; SSE2-NEXT: movq %rdx, %rax -; SSE2-NEXT: movq %rdx, %rcx -; SSE2-NEXT: movq %rdx, %rsi -; SSE2-NEXT: movq %rdx, %rdi -; SSE2-NEXT: movq %rdx, %rbp -; SSE2-NEXT: shlq $49, %rbp -; SSE2-NEXT: sarq $63, %rbp +; SSE2-NEXT: shlq $59, %rbx +; SSE2-NEXT: sarq $63, %rbx +; SSE2-NEXT: movd %ebx, %xmm7 +; SSE2-NEXT: shlq $57, %r10 +; SSE2-NEXT: sarq $63, %r10 +; SSE2-NEXT: movd %r10d, %xmm4 +; SSE2-NEXT: shrq $7, %rax +; SSE2-NEXT: movd %eax, %xmm14 +; SSE2-NEXT: movswq 2(%rdi), %rsi +; SSE2-NEXT: movq %rsi, %r8 +; SSE2-NEXT: movq %rsi, %r9 +; SSE2-NEXT: movq %rsi, %r10 +; SSE2-NEXT: movq %rsi, %r11 +; SSE2-NEXT: movq %rsi, %r14 +; SSE2-NEXT: movq %rsi, %r15 +; SSE2-NEXT: movq %rsi, %r12 +; SSE2-NEXT: movq %rsi, %r13 +; SSE2-NEXT: movq %rsi, %rbx +; SSE2-NEXT: movq %rsi, %rax +; SSE2-NEXT: movq %rsi, %rcx +; SSE2-NEXT: movq %rsi, %rdx +; SSE2-NEXT: movq %rsi, %rdi +; SSE2-NEXT: movq %rsi, %rbp +; SSE2-NEXT: shrq $15, %rbp ; SSE2-NEXT: movd %ebp, %xmm1 -; SSE2-NEXT: movq %rdx, %rbp -; SSE2-NEXT: movsbq %dl, %rdx -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] +; SSE2-NEXT: movq %rsi, %rbp +; SSE2-NEXT: movsbq %sil, %rsi +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] -; SSE2-NEXT: shlq $57, %r8 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7] +; SSE2-NEXT: shlq $49, %r8 ; SSE2-NEXT: sarq $63, %r8 -; SSE2-NEXT: movd %r8d, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] -; SSE2-NEXT: shlq $53, %r9 +; SSE2-NEXT: movd %r8d, %xmm3 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] +; SSE2-NEXT: shlq $50, %r9 ; SSE2-NEXT: sarq $63, %r9 -; SSE2-NEXT: movd %r9d, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSE2-NEXT: shlq $61, %r10 +; SSE2-NEXT: movd %r9d, %xmm4 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; SSE2-NEXT: shlq $51, %r10 ; SSE2-NEXT: sarq $63, %r10 -; SSE2-NEXT: movd %r10d, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] -; SSE2-NEXT: shlq $51, %r11 +; SSE2-NEXT: movd %r10d, %xmm5 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: shlq $52, %r11 ; SSE2-NEXT: sarq $63, %r11 -; SSE2-NEXT: movd %r11d, %xmm5 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: shlq $59, %r14 +; SSE2-NEXT: movd %r11d, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSE2-NEXT: shlq $53, %r14 ; SSE2-NEXT: sarq $63, %r14 -; SSE2-NEXT: movd %r14d, %xmm6 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSE2-NEXT: shlq $55, %r15 +; SSE2-NEXT: movd %r14d, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE2-NEXT: shlq $54, %r15 ; SSE2-NEXT: sarq $63, %r15 -; SSE2-NEXT: movd %r15d, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSE2-NEXT: shlq $63, %r12 +; SSE2-NEXT: movd %r15d, %xmm4 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] +; SSE2-NEXT: shlq $55, %r12 ; SSE2-NEXT: sarq $63, %r12 -; SSE2-NEXT: movd %r12d, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] -; SSE2-NEXT: shlq $50, %r13 +; SSE2-NEXT: movd %r12d, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT: shlq $60, %r13 ; SSE2-NEXT: sarq $63, %r13 ; SSE2-NEXT: movd %r13d, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSE2-NEXT: shlq $58, %rbx +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; SSE2-NEXT: shlq $61, %rbx ; SSE2-NEXT: sarq $63, %rbx -; SSE2-NEXT: movd %ebx, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] -; SSE2-NEXT: shlq $54, %rax +; SSE2-NEXT: movd %ebx, %xmm4 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSE2-NEXT: shlq $62, %rax ; SSE2-NEXT: sarq $63, %rax -; SSE2-NEXT: movd %eax, %xmm5 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] -; SSE2-NEXT: shlq $62, %rcx +; SSE2-NEXT: movd %eax, %xmm6 +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] +; SSE2-NEXT: shlq $63, %rcx ; SSE2-NEXT: sarq $63, %rcx -; SSE2-NEXT: movd %ecx, %xmm4 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: shlq $52, %rsi -; SSE2-NEXT: sarq $63, %rsi -; SSE2-NEXT: movd %esi, %xmm2 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSE2-NEXT: shlq $60, %rdi +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] +; SSE2-NEXT: shlq $58, %rdx +; SSE2-NEXT: sarq $63, %rdx +; SSE2-NEXT: movd %edx, %xmm2 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSE2-NEXT: shlq $59, %rdi ; SSE2-NEXT: sarq $63, %rdi -; SSE2-NEXT: movd %edi, %xmm3 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSE2-NEXT: shrq $15, %rbp +; SSE2-NEXT: movd %edi, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] +; SSE2-NEXT: shlq $57, %rbp +; SSE2-NEXT: sarq $63, %rbp ; SSE2-NEXT: movd %ebp, %xmm2 -; SSE2-NEXT: shrq $7, %rdx -; SSE2-NEXT: movd %edx, %xmm5 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; SSE2-NEXT: shrq $7, %rsi +; SSE2-NEXT: movd %esi, %xmm5 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: popq %r12 ; SSE2-NEXT: popq %r13 @@ -3728,162 +3728,162 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { ; SSSE3-NEXT: pushq %r13 ; SSSE3-NEXT: pushq %r12 ; SSSE3-NEXT: pushq %rbx -; SSSE3-NEXT: movswq (%rdi), %rbx -; SSSE3-NEXT: movq %rbx, %r10 -; SSSE3-NEXT: movq %rbx, %r8 -; SSSE3-NEXT: movq %rbx, %r9 -; SSSE3-NEXT: movq %rbx, %r11 -; SSSE3-NEXT: movq %rbx, %r14 -; SSSE3-NEXT: movq %rbx, %r15 -; SSSE3-NEXT: movq %rbx, %r12 -; SSSE3-NEXT: movq %rbx, %r13 -; SSSE3-NEXT: movq %rbx, %rdx -; SSSE3-NEXT: movq %rbx, %rsi -; SSSE3-NEXT: movq %rbx, %rcx -; SSSE3-NEXT: movq %rbx, %rbp -; SSSE3-NEXT: movq %rbx, %rax -; SSSE3-NEXT: shlq $49, %rax -; SSSE3-NEXT: sarq $63, %rax -; SSSE3-NEXT: movd %eax, %xmm0 -; SSSE3-NEXT: movq %rbx, %rax -; SSSE3-NEXT: shlq $57, %r10 +; SSSE3-NEXT: movswq (%rdi), %rax +; SSSE3-NEXT: movq %rax, %r10 +; SSSE3-NEXT: movq %rax, %r8 +; SSSE3-NEXT: movq %rax, %r9 +; SSSE3-NEXT: movq %rax, %r11 +; SSSE3-NEXT: movq %rax, %r14 +; SSSE3-NEXT: movq %rax, %r15 +; SSSE3-NEXT: movq %rax, %r12 +; SSSE3-NEXT: movq %rax, %r13 +; SSSE3-NEXT: movq %rax, %rdx +; SSSE3-NEXT: movq %rax, %rsi +; SSSE3-NEXT: movq %rax, %rcx +; SSSE3-NEXT: movq %rax, %rbp +; SSSE3-NEXT: movq %rax, %rbx +; SSSE3-NEXT: shrq $15, %rbx +; SSSE3-NEXT: movd %ebx, %xmm0 +; SSSE3-NEXT: movq %rax, %rbx +; SSSE3-NEXT: shlq $49, %r10 ; SSSE3-NEXT: sarq $63, %r10 ; SSSE3-NEXT: movd %r10d, %xmm15 -; SSSE3-NEXT: movq %rbx, %r10 -; SSSE3-NEXT: movsbq %bl, %rbx +; SSSE3-NEXT: movq %rax, %r10 +; SSSE3-NEXT: movsbq %al, %rax ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] -; SSSE3-NEXT: shlq $53, %r8 +; SSSE3-NEXT: shlq $50, %r8 ; SSSE3-NEXT: sarq $63, %r8 ; SSSE3-NEXT: movd %r8d, %xmm8 -; SSSE3-NEXT: shlq $61, %r9 +; SSSE3-NEXT: shlq $51, %r9 ; SSSE3-NEXT: sarq $63, %r9 -; SSSE3-NEXT: movd %r9d, %xmm2 -; SSSE3-NEXT: shlq $51, %r11 +; SSSE3-NEXT: movd %r9d, %xmm3 +; SSSE3-NEXT: shlq $52, %r11 ; SSSE3-NEXT: sarq $63, %r11 ; SSSE3-NEXT: movd %r11d, %xmm9 -; SSSE3-NEXT: shlq $59, %r14 +; SSSE3-NEXT: shlq $53, %r14 ; SSSE3-NEXT: sarq $63, %r14 -; SSSE3-NEXT: movd %r14d, %xmm5 -; SSSE3-NEXT: shlq $55, %r15 +; SSSE3-NEXT: movd %r14d, %xmm6 +; SSSE3-NEXT: shlq $54, %r15 ; SSSE3-NEXT: sarq $63, %r15 ; SSSE3-NEXT: movd %r15d, %xmm10 -; SSSE3-NEXT: shlq $63, %r12 +; SSSE3-NEXT: shlq $55, %r12 ; SSSE3-NEXT: sarq $63, %r12 -; SSSE3-NEXT: movd %r12d, %xmm0 -; SSSE3-NEXT: shlq $50, %r13 +; SSSE3-NEXT: movd %r12d, %xmm2 +; SSSE3-NEXT: shlq $60, %r13 ; SSSE3-NEXT: sarq $63, %r13 ; SSSE3-NEXT: movd %r13d, %xmm11 -; SSSE3-NEXT: shlq $58, %rdx +; SSSE3-NEXT: shlq $61, %rdx ; SSSE3-NEXT: sarq $63, %rdx -; SSSE3-NEXT: movd %edx, %xmm4 -; SSSE3-NEXT: shlq $54, %rsi +; SSSE3-NEXT: movd %edx, %xmm5 +; SSSE3-NEXT: shlq $62, %rsi ; SSSE3-NEXT: sarq $63, %rsi ; SSSE3-NEXT: movd %esi, %xmm12 -; SSSE3-NEXT: shlq $62, %rcx +; SSSE3-NEXT: shlq $63, %rcx ; SSSE3-NEXT: sarq $63, %rcx -; SSSE3-NEXT: movd %ecx, %xmm6 -; SSSE3-NEXT: shlq $52, %rbp +; SSSE3-NEXT: movd %ecx, %xmm0 +; SSSE3-NEXT: shlq $58, %rbp ; SSSE3-NEXT: sarq $63, %rbp ; SSSE3-NEXT: movd %ebp, %xmm13 -; SSSE3-NEXT: shlq $60, %rax -; SSSE3-NEXT: sarq $63, %rax -; SSSE3-NEXT: movd %eax, %xmm7 -; SSSE3-NEXT: shrq $15, %r10 -; SSSE3-NEXT: movd %r10d, %xmm14 -; SSSE3-NEXT: shrq $7, %rbx -; SSSE3-NEXT: movd %ebx, %xmm3 -; SSSE3-NEXT: movswq 2(%rdi), %rdx -; SSSE3-NEXT: movq %rdx, %r8 -; SSSE3-NEXT: movq %rdx, %r9 -; SSSE3-NEXT: movq %rdx, %r10 -; SSSE3-NEXT: movq %rdx, %r11 -; SSSE3-NEXT: movq %rdx, %r14 -; SSSE3-NEXT: movq %rdx, %r15 -; SSSE3-NEXT: movq %rdx, %r12 -; SSSE3-NEXT: movq %rdx, %r13 -; SSSE3-NEXT: movq %rdx, %rbx -; SSSE3-NEXT: movq %rdx, %rax -; SSSE3-NEXT: movq %rdx, %rcx -; SSSE3-NEXT: movq %rdx, %rsi -; SSSE3-NEXT: movq %rdx, %rdi -; SSSE3-NEXT: movq %rdx, %rbp -; SSSE3-NEXT: shlq $49, %rbp -; SSSE3-NEXT: sarq $63, %rbp +; SSSE3-NEXT: shlq $59, %rbx +; SSSE3-NEXT: sarq $63, %rbx +; SSSE3-NEXT: movd %ebx, %xmm7 +; SSSE3-NEXT: shlq $57, %r10 +; SSSE3-NEXT: sarq $63, %r10 +; SSSE3-NEXT: movd %r10d, %xmm4 +; SSSE3-NEXT: shrq $7, %rax +; SSSE3-NEXT: movd %eax, %xmm14 +; SSSE3-NEXT: movswq 2(%rdi), %rsi +; SSSE3-NEXT: movq %rsi, %r8 +; SSSE3-NEXT: movq %rsi, %r9 +; SSSE3-NEXT: movq %rsi, %r10 +; SSSE3-NEXT: movq %rsi, %r11 +; SSSE3-NEXT: movq %rsi, %r14 +; SSSE3-NEXT: movq %rsi, %r15 +; SSSE3-NEXT: movq %rsi, %r12 +; SSSE3-NEXT: movq %rsi, %r13 +; SSSE3-NEXT: movq %rsi, %rbx +; SSSE3-NEXT: movq %rsi, %rax +; SSSE3-NEXT: movq %rsi, %rcx +; SSSE3-NEXT: movq %rsi, %rdx +; SSSE3-NEXT: movq %rsi, %rdi +; SSSE3-NEXT: movq %rsi, %rbp +; SSSE3-NEXT: shrq $15, %rbp ; SSSE3-NEXT: movd %ebp, %xmm1 -; SSSE3-NEXT: movq %rdx, %rbp -; SSSE3-NEXT: movsbq %dl, %rdx -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] +; SSSE3-NEXT: movq %rsi, %rbp +; SSSE3-NEXT: movsbq %sil, %rsi +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm10[0],xmm2[1],xmm10[1],xmm2[2],xmm10[2],xmm2[3],xmm10[3],xmm2[4],xmm10[4],xmm2[5],xmm10[5],xmm2[6],xmm10[6],xmm2[7],xmm10[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] -; SSSE3-NEXT: shlq $57, %r8 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7] +; SSSE3-NEXT: shlq $49, %r8 ; SSSE3-NEXT: sarq $63, %r8 -; SSSE3-NEXT: movd %r8d, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] -; SSSE3-NEXT: shlq $53, %r9 +; SSSE3-NEXT: movd %r8d, %xmm3 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3] +; SSSE3-NEXT: shlq $50, %r9 ; SSSE3-NEXT: sarq $63, %r9 -; SSSE3-NEXT: movd %r9d, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSSE3-NEXT: shlq $61, %r10 +; SSSE3-NEXT: movd %r9d, %xmm4 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; SSSE3-NEXT: shlq $51, %r10 ; SSSE3-NEXT: sarq $63, %r10 -; SSSE3-NEXT: movd %r10d, %xmm4 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] -; SSSE3-NEXT: shlq $51, %r11 +; SSSE3-NEXT: movd %r10d, %xmm5 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSSE3-NEXT: shlq $52, %r11 ; SSSE3-NEXT: sarq $63, %r11 -; SSSE3-NEXT: movd %r11d, %xmm5 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSSE3-NEXT: shlq $59, %r14 +; SSSE3-NEXT: movd %r11d, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; SSSE3-NEXT: shlq $53, %r14 ; SSSE3-NEXT: sarq $63, %r14 -; SSSE3-NEXT: movd %r14d, %xmm6 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSSE3-NEXT: shlq $55, %r15 +; SSSE3-NEXT: movd %r14d, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSSE3-NEXT: shlq $54, %r15 ; SSSE3-NEXT: sarq $63, %r15 -; SSSE3-NEXT: movd %r15d, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; SSSE3-NEXT: shlq $63, %r12 +; SSSE3-NEXT: movd %r15d, %xmm4 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3] +; SSSE3-NEXT: shlq $55, %r12 ; SSSE3-NEXT: sarq $63, %r12 -; SSSE3-NEXT: movd %r12d, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] -; SSSE3-NEXT: shlq $50, %r13 +; SSSE3-NEXT: movd %r12d, %xmm3 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSSE3-NEXT: shlq $60, %r13 ; SSSE3-NEXT: sarq $63, %r13 ; SSSE3-NEXT: movd %r13d, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] -; SSSE3-NEXT: shlq $58, %rbx +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; SSSE3-NEXT: shlq $61, %rbx ; SSSE3-NEXT: sarq $63, %rbx -; SSSE3-NEXT: movd %ebx, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] -; SSSE3-NEXT: shlq $54, %rax +; SSSE3-NEXT: movd %ebx, %xmm4 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; SSSE3-NEXT: shlq $62, %rax ; SSSE3-NEXT: sarq $63, %rax -; SSSE3-NEXT: movd %eax, %xmm5 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] -; SSSE3-NEXT: shlq $62, %rcx +; SSSE3-NEXT: movd %eax, %xmm6 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] +; SSSE3-NEXT: shlq $63, %rcx ; SSSE3-NEXT: sarq $63, %rcx -; SSSE3-NEXT: movd %ecx, %xmm4 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSSE3-NEXT: shlq $52, %rsi -; SSSE3-NEXT: sarq $63, %rsi -; SSSE3-NEXT: movd %esi, %xmm2 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSSE3-NEXT: shlq $60, %rdi +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] +; SSSE3-NEXT: shlq $58, %rdx +; SSSE3-NEXT: sarq $63, %rdx +; SSSE3-NEXT: movd %edx, %xmm2 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSSE3-NEXT: shlq $59, %rdi ; SSSE3-NEXT: sarq $63, %rdi -; SSSE3-NEXT: movd %edi, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; SSSE3-NEXT: shrq $15, %rbp +; SSSE3-NEXT: movd %edi, %xmm4 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] +; SSSE3-NEXT: shlq $57, %rbp +; SSSE3-NEXT: sarq $63, %rbp ; SSSE3-NEXT: movd %ebp, %xmm2 -; SSSE3-NEXT: shrq $7, %rdx -; SSSE3-NEXT: movd %edx, %xmm5 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; SSSE3-NEXT: shrq $7, %rsi +; SSSE3-NEXT: movd %esi, %xmm5 +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSSE3-NEXT: popq %rbx ; SSSE3-NEXT: popq %r12 ; SSSE3-NEXT: popq %r13 diff --git a/test/CodeGen/X86/vector-shuffle-v48.ll b/test/CodeGen/X86/vector-shuffle-v48.ll new file mode 100644 index 000000000000..9bd75148ecd1 --- /dev/null +++ b/test/CodeGen/X86/vector-shuffle-v48.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx2 < %s | FileCheck %s +define <16 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) { +; CHECK-LABEL: foo: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqu (%rdi), %ymm4 +; CHECK-NEXT: vmovdqu 32(%rdi), %xmm5 +; CHECK-NEXT: vpextrb $13, %xmm5, %eax +; CHECK-NEXT: vpextrb $10, %xmm5, %ecx +; CHECK-NEXT: vpextrb $7, %xmm5, %edx +; CHECK-NEXT: vpextrb $4, %xmm5, %esi +; CHECK-NEXT: vpextrb $1, %xmm5, %edi +; CHECK-NEXT: vextracti128 $1, %ymm4, %xmm5 +; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm5[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm4[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm6 = xmm7[0],xmm6[0] +; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7] +; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4 +; CHECK-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4 +; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,11,14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpinsrb $3, %edi, %xmm5, %xmm5 +; CHECK-NEXT: vpinsrb $4, %esi, %xmm5, %xmm5 +; CHECK-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5 +; CHECK-NEXT: vpinsrb $6, %ecx, %xmm5, %xmm5 +; CHECK-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 +; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero +; CHECK-NEXT: vpmulld %ymm0, %ymm4, %ymm0 +; CHECK-NEXT: vpmulld %ymm1, %ymm5, %ymm1 +; CHECK-NEXT: vpsrlvd %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlvd %ymm3, %ymm1, %ymm1 +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] +; CHECK-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; CHECK-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; CHECK-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; CHECK-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %1 = load <48 x i8>, <48 x i8>* %x0, align 1 + %2 = shufflevector <48 x i8> %1, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> + %3 = zext <16 x i8> %2 to <16 x i32> + %4 = mul <16 x i32> %3, %x1 + %5 = lshr <16 x i32> %4, %x2 + %6 = trunc <16 x i32> %5 to <16 x i8> + ret <16 x i8> %6 +} diff --git a/test/CodeGen/X86/vector-shuffle-variable-128.ll b/test/CodeGen/X86/vector-shuffle-variable-128.ll index bde8a16d2a5a..452f387a4fee 100644 --- a/test/CodeGen/X86/vector-shuffle-variable-128.ll +++ b/test/CodeGen/X86/vector-shuffle-variable-128.ll @@ -83,7 +83,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32: @@ -103,7 +103,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32: @@ -168,7 +168,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32: @@ -188,7 +188,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i ; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32: @@ -257,27 +257,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1 ; SSE2-NEXT: andl $7, %eax ; SSE2-NEXT: movzwl -24(%rsp,%rax,2), %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax +; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax +; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-NEXT: movzwl -24(%rsp,%r10,2), %eax +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: movzwl -24(%rsp,%rdx,2), %eax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: movzwl -24(%rsp,%r8,2), %eax +; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax ; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: movzwl -24(%rsp,%rdi,2), %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16: @@ -301,27 +301,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1 ; SSSE3-NEXT: andl $7, %eax ; SSSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax ; SSSE3-NEXT: movd %eax, %xmm0 -; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax +; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax ; SSSE3-NEXT: movd %eax, %xmm1 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax ; SSSE3-NEXT: movd %eax, %xmm0 -; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax +; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax ; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSSE3-NEXT: movzwl -24(%rsp,%r10,2), %eax +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: movzwl -24(%rsp,%rdx,2), %eax ; SSSE3-NEXT: movd %eax, %xmm1 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSSE3-NEXT: movzwl -24(%rsp,%r8,2), %eax +; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax ; SSSE3-NEXT: movd %eax, %xmm3 ; SSSE3-NEXT: movzwl -24(%rsp,%rdi,2), %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16: @@ -425,67 +425,67 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 % ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax ; SSE2-NEXT: movd %eax, %xmm9 -; SSE2-NEXT: andl $15, %ecx -; SSE2-NEXT: movzbl (%rcx,%r10), %eax +; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: andl $15, %eax +; SSE2-NEXT: movzbl (%rax,%r10), %eax ; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax ; SSE2-NEXT: movd %eax, %xmm10 -; SSE2-NEXT: andl $15, %r9d -; SSE2-NEXT: movzbl (%r9,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm7 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm11 -; SSE2-NEXT: andl $15, %esi -; SSE2-NEXT: movzbl (%rsi,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm6 +; SSE2-NEXT: movd %eax, %xmm7 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm12 +; SSE2-NEXT: movd %eax, %xmm11 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm5 +; SSE2-NEXT: movd %eax, %xmm6 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm13 -; SSE2-NEXT: andl $15, %edx -; SSE2-NEXT: movzbl (%rdx,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm4 +; SSE2-NEXT: movd %eax, %xmm12 ; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSE2-NEXT: andl $15, %eax ; SSE2-NEXT: movzbl (%rax,%r10), %eax -; SSE2-NEXT: movd %eax, %xmm14 +; SSE2-NEXT: movd %eax, %xmm5 +; SSE2-NEXT: andl $15, %r9d +; SSE2-NEXT: movzbl (%r9,%r10), %eax +; SSE2-NEXT: movd %eax, %xmm13 ; SSE2-NEXT: andl $15, %r8d ; SSE2-NEXT: movzbl (%r8,%r10), %eax +; SSE2-NEXT: movd %eax, %xmm4 +; SSE2-NEXT: andl $15, %ecx +; SSE2-NEXT: movzbl (%rcx,%r10), %eax +; SSE2-NEXT: movd %eax, %xmm14 +; SSE2-NEXT: andl $15, %edx +; SSE2-NEXT: movzbl (%rdx,%r10), %eax ; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: andl $15, %eax -; SSE2-NEXT: movzbl (%rax,%r10), %eax +; SSE2-NEXT: andl $15, %esi +; SSE2-NEXT: movzbl (%rsi,%r10), %eax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: andl $15, %edi ; SSE2-NEXT: movzbl (%rdi,%r10), %eax ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8: @@ -510,67 +510,67 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 % ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm9 -; SSSE3-NEXT: andl $15, %ecx -; SSSE3-NEXT: movzbl (%rcx,%r10), %eax +; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; SSSE3-NEXT: andl $15, %eax +; SSSE3-NEXT: movzbl (%rax,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm3 ; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm10 -; SSSE3-NEXT: andl $15, %r9d -; SSSE3-NEXT: movzbl (%r9,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm7 ; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm11 -; SSSE3-NEXT: andl $15, %esi -; SSSE3-NEXT: movzbl (%rsi,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm6 +; SSSE3-NEXT: movd %eax, %xmm7 ; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm12 +; SSSE3-NEXT: movd %eax, %xmm11 ; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm5 +; SSSE3-NEXT: movd %eax, %xmm6 ; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm13 -; SSSE3-NEXT: andl $15, %edx -; SSSE3-NEXT: movzbl (%rdx,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm4 +; SSSE3-NEXT: movd %eax, %xmm12 ; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; SSSE3-NEXT: andl $15, %eax ; SSSE3-NEXT: movzbl (%rax,%r10), %eax -; SSSE3-NEXT: movd %eax, %xmm14 +; SSSE3-NEXT: movd %eax, %xmm5 +; SSSE3-NEXT: andl $15, %r9d +; SSSE3-NEXT: movzbl (%r9,%r10), %eax +; SSSE3-NEXT: movd %eax, %xmm13 ; SSSE3-NEXT: andl $15, %r8d ; SSSE3-NEXT: movzbl (%r8,%r10), %eax +; SSSE3-NEXT: movd %eax, %xmm4 +; SSSE3-NEXT: andl $15, %ecx +; SSSE3-NEXT: movzbl (%rcx,%r10), %eax +; SSSE3-NEXT: movd %eax, %xmm14 +; SSSE3-NEXT: andl $15, %edx +; SSSE3-NEXT: movzbl (%rdx,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm1 -; SSSE3-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; SSSE3-NEXT: andl $15, %eax -; SSSE3-NEXT: movzbl (%rax,%r10), %eax +; SSSE3-NEXT: andl $15, %esi +; SSSE3-NEXT: movzbl (%rsi,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: andl $15, %edi ; SSSE3-NEXT: movzbl (%rdi,%r10), %eax ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8: @@ -739,7 +739,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32: @@ -759,7 +759,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi ; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32: @@ -824,23 +824,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm8 -; SSE2-NEXT: movzbl 7(%rdi), %edx +; SSE2-NEXT: movzbl 14(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm15 -; SSE2-NEXT: movzbl 11(%rdi), %edx +; SSE2-NEXT: movzbl 13(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm9 -; SSE2-NEXT: movzbl 3(%rdi), %edx +; SSE2-NEXT: movzbl 12(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm3 -; SSE2-NEXT: movzbl 13(%rdi), %edx +; SSE2-NEXT: movzbl 11(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm10 -; SSE2-NEXT: movzbl 5(%rdi), %edx +; SSE2-NEXT: movzbl 10(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm7 @@ -848,11 +848,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm11 -; SSE2-NEXT: movzbl 1(%rdi), %edx +; SSE2-NEXT: movzbl 8(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm6 -; SSE2-NEXT: movzbl 14(%rdi), %edx +; SSE2-NEXT: movzbl 7(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm12 @@ -860,23 +860,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm5 -; SSE2-NEXT: movzbl 10(%rdi), %edx +; SSE2-NEXT: movzbl 5(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm13 -; SSE2-NEXT: movzbl 2(%rdi), %edx +; SSE2-NEXT: movzbl 4(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm4 -; SSE2-NEXT: movzbl 12(%rdi), %edx +; SSE2-NEXT: movzbl 3(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm14 -; SSE2-NEXT: movzbl 4(%rdi), %edx +; SSE2-NEXT: movzbl 2(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm1 -; SSE2-NEXT: movzbl 8(%rdi), %edx +; SSE2-NEXT: movzbl 1(%rdi), %edx ; SSE2-NEXT: andl $15, %edx ; SSE2-NEXT: movzbl (%rdx,%rcx), %edx ; SSE2-NEXT: movd %edx, %xmm2 @@ -885,19 +885,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSE2-NEXT: movd %eax, %xmm0 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8: @@ -909,23 +909,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %rcx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm8 -; SSSE3-NEXT: movzbl 7(%rdi), %edx +; SSSE3-NEXT: movzbl 14(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm15 -; SSSE3-NEXT: movzbl 11(%rdi), %edx +; SSSE3-NEXT: movzbl 13(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm9 -; SSSE3-NEXT: movzbl 3(%rdi), %edx +; SSSE3-NEXT: movzbl 12(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm3 -; SSSE3-NEXT: movzbl 13(%rdi), %edx +; SSSE3-NEXT: movzbl 11(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm10 -; SSSE3-NEXT: movzbl 5(%rdi), %edx +; SSSE3-NEXT: movzbl 10(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm7 @@ -933,11 +933,11 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm11 -; SSSE3-NEXT: movzbl 1(%rdi), %edx +; SSSE3-NEXT: movzbl 8(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm6 -; SSSE3-NEXT: movzbl 14(%rdi), %edx +; SSSE3-NEXT: movzbl 7(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm12 @@ -945,23 +945,23 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm5 -; SSSE3-NEXT: movzbl 10(%rdi), %edx +; SSSE3-NEXT: movzbl 5(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm13 -; SSSE3-NEXT: movzbl 2(%rdi), %edx +; SSSE3-NEXT: movzbl 4(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm4 -; SSSE3-NEXT: movzbl 12(%rdi), %edx +; SSSE3-NEXT: movzbl 3(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm14 -; SSSE3-NEXT: movzbl 4(%rdi), %edx +; SSSE3-NEXT: movzbl 2(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm1 -; SSSE3-NEXT: movzbl 8(%rdi), %edx +; SSSE3-NEXT: movzbl 1(%rdi), %edx ; SSSE3-NEXT: andl $15, %edx ; SSSE3-NEXT: movzbl (%rdx,%rcx), %edx ; SSSE3-NEXT: movd %edx, %xmm2 @@ -970,19 +970,19 @@ define <16 x i8> @mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8* ; SSSE3-NEXT: movd %eax, %xmm0 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm9[0],xmm3[1],xmm9[1],xmm3[2],xmm9[2],xmm3[3],xmm9[3],xmm3[4],xmm9[4],xmm3[5],xmm9[5],xmm3[6],xmm9[6],xmm3[7],xmm9[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3],xmm3[4],xmm15[4],xmm3[5],xmm15[5],xmm3[6],xmm15[6],xmm3[7],xmm15[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm11[0],xmm6[1],xmm11[1],xmm6[2],xmm11[2],xmm6[3],xmm11[3],xmm6[4],xmm11[4],xmm6[5],xmm11[5],xmm6[6],xmm11[6],xmm6[7],xmm11[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1],xmm6[2],xmm3[2],xmm6[3],xmm3[3],xmm6[4],xmm3[4],xmm6[5],xmm3[5],xmm6[6],xmm3[6],xmm6[7],xmm3[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm13[0],xmm4[1],xmm13[1],xmm4[2],xmm13[2],xmm4[3],xmm13[3],xmm4[4],xmm13[4],xmm4[5],xmm13[5],xmm4[6],xmm13[6],xmm4[7],xmm13[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7] ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mem_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8: @@ -1225,28 +1225,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> % ; SSE2-NEXT: andl $7, %ecx ; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: andl $7, %r8d -; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: andl $7, %r9d ; SSE2-NEXT: movzwl -24(%rsp,%rcx,2), %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; SSE2-NEXT: movzwl -40(%rsp,%rdx,2), %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSE2-NEXT: movzwl -40(%rsp,%r8,2), %eax ; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movzwl -24(%rsp,%rsi,2), %eax +; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: movzwl -40(%rsp,%rdi,2), %eax ; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movzwl -24(%rsp,%r9,2), %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movzwl -40(%rsp,%r8,2), %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16: @@ -1263,28 +1262,27 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> % ; SSSE3-NEXT: andl $7, %ecx ; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: andl $7, %r8d -; SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSSE3-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; SSSE3-NEXT: andl $7, %r9d ; SSSE3-NEXT: movzwl -24(%rsp,%rcx,2), %eax ; SSSE3-NEXT: movd %eax, %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm1 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax -; SSSE3-NEXT: movd %eax, %xmm2 -; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax -; SSSE3-NEXT: movd %eax, %xmm3 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; SSSE3-NEXT: movzwl -40(%rsp,%rdx,2), %eax -; SSSE3-NEXT: movd %eax, %xmm2 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; SSSE3-NEXT: movzwl -40(%rsp,%r8,2), %eax ; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSSE3-NEXT: movzwl -24(%rsp,%rsi,2), %eax +; SSSE3-NEXT: movd %eax, %xmm2 ; SSSE3-NEXT: movzwl -40(%rsp,%rdi,2), %eax ; SSSE3-NEXT: movd %eax, %xmm0 -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movzwl -24(%rsp,%r9,2), %eax +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: movzwl -40(%rsp,%r8,2), %eax +; SSSE3-NEXT: movd %eax, %xmm2 +; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSSE3-NEXT: pxor %xmm1, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16: diff --git a/test/CodeGen/X86/vector-sqrt.ll b/test/CodeGen/X86/vector-sqrt.ll index 13088b7fa5f2..c5ac4466b5fa 100644 --- a/test/CodeGen/X86/vector-sqrt.ll +++ b/test/CodeGen/X86/vector-sqrt.ll @@ -5,10 +5,8 @@ define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 { ; CHECK-LABEL: sqrtd2: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq entry: @@ -29,14 +27,10 @@ declare double @sqrt(double) local_unnamed_addr #1 define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 { ; CHECK-LABEL: sqrtf4: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0 +; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1 +; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2 +; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3 ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] diff --git a/test/CodeGen/X86/vector-unsigned-cmp.ll b/test/CodeGen/X86/vector-unsigned-cmp.ll index fc246669992c..3e4b9aedf2b8 100644 --- a/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -13,7 +13,7 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -30,9 +30,6 @@ define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) { ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> @@ -46,7 +43,7 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm2 @@ -63,9 +60,6 @@ define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) { ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <2 x i64> %x, <i64 1, i64 1> @@ -79,7 +73,7 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm2 @@ -98,9 +92,6 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -116,7 +107,7 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -135,9 +126,6 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -153,31 +141,15 @@ define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrld $1, %xmm0 ; SSE-NEXT: psrld $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: ugt_v4i32: -; AVX1: # BB#0: -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_v4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: ugt_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> %cmp = icmp ugt <4 x i32> %sh1, %sh2 @@ -189,32 +161,16 @@ define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrld $1, %xmm0 ; SSE-NEXT: psrld $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: ult_v4i32: -; AVX1: # BB#0: -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_v4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: ult_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1> %cmp = icmp ult <4 x i32> %sh1, %sh2 @@ -226,12 +182,9 @@ define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE2: # BB#0: ; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: uge_v4i32: @@ -260,9 +213,6 @@ define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE2: # BB#0: ; SSE2-NEXT: psrld $1, %xmm0 ; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm1, %xmm0 @@ -294,9 +244,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -304,9 +251,6 @@ define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) { ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> @@ -320,20 +264,14 @@ define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) { ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pcmpgtw %xmm0, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ult_v8i16: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> @@ -408,22 +346,20 @@ define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) { ; SSE-LABEL: ugt_v16i8: ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: por %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ugt_v16i8: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -436,11 +372,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) { ; SSE-LABEL: ult_v16i8: ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pand %xmm1, %xmm2 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2 ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq @@ -448,11 +383,10 @@ define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) { ; AVX-LABEL: ult_v16i8: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> diff --git a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll index 0eb17fb6c14d..c1d242575253 100644 --- a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll +++ b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll @@ -15,5 +15,5 @@ body: ; PRE-RA-NEXT: - { reg: '%esi', virtual-reg: '%1' } ; POST-RA: liveins: -; POST-RA-NEXT: - { reg: '%edi' } -; POST-RA-NEXT: - { reg: '%esi' } +; POST-RA-NEXT: - { reg: '%edi', virtual-reg: '' } +; POST-RA-NEXT: - { reg: '%esi', virtual-reg: '' } diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll index c9a34de12369..a31adc337906 100644 --- a/test/CodeGen/X86/vshift-1.ll +++ b/test/CodeGen/X86/vshift-1.ll @@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind { ; X32-LABEL: shift1b: ; X32: # BB#0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] -; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32-NEXT: psllq %xmm2, %xmm0 +; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] +; X32-NEXT: psllq %xmm1, %xmm0 ; X32-NEXT: movdqa %xmm0, (%eax) ; X32-NEXT: retl ; diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll index 88cba8a4d6ac..a381637b40a9 100644 --- a/test/CodeGen/X86/vshift-2.ll +++ b/test/CodeGen/X86/vshift-2.ll @@ -28,12 +28,9 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind { ; X32-LABEL: shift1b: ; X32: # BB#0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; X32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X32-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] -; X32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X32-NEXT: psrlq %xmm2, %xmm0 +; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] +; X32-NEXT: psrlq %xmm1, %xmm0 ; X32-NEXT: movdqa %xmm0, (%eax) ; X32-NEXT: retl ; diff --git a/test/CodeGen/X86/x86-interleaved-access.ll b/test/CodeGen/X86/x86-interleaved-access.ll index 4181a374c61c..74214aa1b8b7 100644 --- a/test/CodeGen/X86/x86-interleaved-access.ll +++ b/test/CodeGen/X86/x86-interleaved-access.ll @@ -135,3 +135,96 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) { %add3 = add <4 x i64> %add2, %strided.v3 ret <4 x i64> %add3 } + +define void @store_factorf64_4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { +; AVX-LABEL: store_factorf64_4: +; AVX: # BB#0: +; AVX-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm2[0],xmm3[0] +; AVX-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm5 = xmm0[0],xmm1[0] +; AVX-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3] +; AVX-NEXT: vunpckhpd {{.*#+}} xmm5 = xmm2[1],xmm3[1] +; AVX-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 +; AVX-NEXT: vunpckhpd {{.*#+}} xmm6 = xmm0[1],xmm1[1] +; AVX-NEXT: vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3] +; AVX-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX-NEXT: vextractf128 $1, %ymm7, %xmm7 +; AVX-NEXT: vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3] +; AVX-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3] +; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3] +; AVX-NEXT: vmovupd %ymm0, 96(%rdi) +; AVX-NEXT: vmovupd %ymm6, 64(%rdi) +; AVX-NEXT: vmovupd %ymm5, 32(%rdi) +; AVX-NEXT: vmovupd %ymm4, (%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> + store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 16 + ret void +} + +define void @store_factori64_4(<16 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) { +; AVX1-LABEL: store_factori64_4: +; AVX1: # BB#0: +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm2[0],xmm3[0] +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm0[0],xmm1[0] +; AVX1-NEXT: vblendpd {{.*#+}} ymm4 = ymm5[0,1],ymm4[2,3] +; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm5 = xmm2[1],xmm3[1] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 +; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm6 = xmm0[1],xmm1[1] +; AVX1-NEXT: vblendpd {{.*#+}} ymm5 = ymm6[0,1],ymm5[2,3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm7 +; AVX1-NEXT: vblendpd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3] +; AVX1-NEXT: vmovupd %ymm0, 96(%rdi) +; AVX1-NEXT: vmovupd %ymm6, 64(%rdi) +; AVX1-NEXT: vmovupd %ymm5, 32(%rdi) +; AVX1-NEXT: vmovupd %ymm4, (%rdi) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: store_factori64_4: +; AVX2: # BB#0: +; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm5 +; AVX2-NEXT: vpermq {{.*#+}} ymm6 = ymm1[0,2,2,3] +; AVX2-NEXT: vpblendd {{.*#+}} xmm5 = xmm5[0,1],xmm6[2,3] +; AVX2-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3],ymm4[4,5,6,7] +; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm5 = ymm2[1],ymm3[1],ymm2[3],ymm3[3] +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm6 +; AVX2-NEXT: vpermq {{.*#+}} ymm7 = ymm0[3,1,2,3] +; AVX2-NEXT: vpblendd {{.*#+}} xmm6 = xmm7[0,1],xmm6[2,3] +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm6 +; AVX2-NEXT: vpbroadcastq %xmm3, %ymm7 +; AVX2-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm7[6,7] +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm7 = xmm0[0],xmm1[0] +; AVX2-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1,2,3],ymm6[4,5,6,7] +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,1,1,3] +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7] +; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) +; AVX2-NEXT: vmovdqu %ymm6, (%rdi) +; AVX2-NEXT: vmovdqu %ymm5, 96(%rdi) +; AVX2-NEXT: vmovdqu %ymm4, 64(%rdi) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> + store <16 x i64> %interleaved.vec, <16 x i64>* %ptr, align 16 + ret void +} |
