diff options
Diffstat (limited to 'test')
55 files changed, 1620 insertions, 630 deletions
diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll index 5b8652396b3a..6f44465315e6 100644 --- a/test/Analysis/DemandedBits/basic.ll +++ b/test/Analysis/DemandedBits/basic.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s -; CHECK-DAG: DemandedBits: 0xFF for %1 = add nsw i32 %a, 5 -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 -; CHECK-DAG: DemandedBits: 0xFF for %2 = mul nsw i32 %1, %b +; CHECK-DAG: DemandedBits: 0xff for %1 = add nsw i32 %a, 5 +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff for %2 = mul nsw i32 %1, %b define i8 @test_mul(i32 %a, i32 %b) { %1 = add nsw i32 %a, 5 %2 = mul nsw i32 %1, %b diff --git a/test/Analysis/DemandedBits/intrinsics.ll b/test/Analysis/DemandedBits/intrinsics.ll index 5a6d17284a72..48f6d4624422 100644 --- a/test/Analysis/DemandedBits/intrinsics.ll +++ b/test/Analysis/DemandedBits/intrinsics.ll @@ -1,9 +1,9 @@ ; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s ; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s -; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 -; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bitreverse.i32(i32 %1) -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bitreverse.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 define i8 @test_bswap(i32 %x) { %1 = or i32 %x, 1 %2 = call i32 @llvm.bswap.i32(i32 %1) @@ -12,9 +12,9 @@ define i8 @test_bswap(i32 %x) { } declare i32 @llvm.bswap.i32(i32) -; CHECK-DAG: DemandedBits: 0xFF000000 for %1 = or i32 %x, 1 -; CHECK-DAG: DemandedBits: 0xFF for %2 = call i32 @llvm.bswap.i32(i32 %1) -; CHECK-DAG: DemandedBits: 0xFF for %3 = trunc i32 %2 to i8 +; CHECK-DAG: DemandedBits: 0xff000000 for %1 = or i32 %x, 1 +; CHECK-DAG: DemandedBits: 0xff for %2 = call i32 @llvm.bswap.i32(i32 %1) +; CHECK-DAG: DemandedBits: 0xff for %3 = trunc i32 %2 to i8 define i8 @test_bitreverse(i32 %x) { %1 = or i32 %x, 1 %2 = call i32 @llvm.bitreverse.i32(i32 %1) diff --git a/test/Analysis/Lint/noalias-byval.ll b/test/Analysis/Lint/noalias-byval.ll new file mode 100644 index 000000000000..5b36c6d15df3 --- /dev/null +++ b/test/Analysis/Lint/noalias-byval.ll @@ -0,0 +1,48 @@ +; RUN: opt < %s -lint -disable-output 2>&1 | FileCheck %s + +%s = type { i8 } + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #0 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #0 + +declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone) + +define void @f2() { +entry: + %c = alloca %s + %tmp = alloca %s + %0 = bitcast %s* %c to i8* + %1 = bitcast %s* %tmp to i8* + call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) + call void @f1(%s* sret %c, %s* %c) + ret void +} + +; Lint should complain about us passing %c to both arguments since one of them +; is noalias. +; CHECK: Unusual: noalias argument aliases another argument +; CHECK-NEXT: call void @f1(%s* sret %c, %s* %c) + +declare void @f3(%s* noalias nocapture sret, %s* byval nocapture readnone) + +define void @f4() { +entry: + %c = alloca %s + %tmp = alloca %s + %0 = bitcast %s* %c to i8* + %1 = bitcast %s* %tmp to i8* + call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) + call void @f3(%s* sret %c, %s* byval %c) + ret void +} + +; Lint should not complain about passing %c to both arguments even if one is +; noalias, since the other one is byval, effectively copying the data to the +; stack instead of passing the pointer itself. +; CHECK-NOT: Unusual: noalias argument aliases another argument +; CHECK-NOT: call void @f3(%s* sret %c, %s* %c) + +attributes #0 = { argmemonly nounwind } diff --git a/test/Analysis/ScalarEvolution/truncate.ll b/test/Analysis/ScalarEvolution/truncate.ll new file mode 100644 index 000000000000..e9bd39d7a268 --- /dev/null +++ b/test/Analysis/ScalarEvolution/truncate.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -analyze -scalar-evolution +; RUN: opt < %s -passes='print<scalar-evolution>' +; Regression test for assert ScalarEvolution::getTruncateExpr. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +define void @snork(i8* %arg, i8 %arg1, i64 %arg2) { +bb: + br label %bb12 + +bb3: ; preds = %bb34 + br i1 true, label %bb4, label %bb12 + +bb4: ; preds = %bb3 + br label %bb6 + +bb5: ; preds = %bb6 + ret void + +bb6: ; preds = %bb6, %bb4 + %tmp = phi i64 [ %tmp28, %bb4 ], [ %tmp10, %bb6 ] + %tmp7 = phi i32 [ 3, %bb4 ], [ %tmp11, %bb6 ] + %tmp8 = trunc i64 %tmp to i32 + %tmp9 = sdiv i32 %tmp8, %tmp7 + %tmp10 = add i64 %tmp, -1 + %tmp11 = add i32 %tmp9, %tmp7 + br i1 true, label %bb5, label %bb6 + +bb12: ; preds = %bb3, %bb + br label %bb13 + +bb13: ; preds = %bb34, %bb12 + %tmp14 = phi i64 [ %arg2, %bb12 ], [ %tmp28, %bb34 ] + %tmp15 = phi i8 [ %arg1, %bb12 ], [ %tmp26, %bb34 ] + %tmp16 = phi i32 [ 1, %bb12 ], [ %tmp35, %bb34 ] + %tmp17 = add i8 %tmp15, -1 + %tmp18 = sext i8 %tmp17 to i64 + %tmp19 = sub i64 1, %tmp14 + %tmp20 = add i64 %tmp19, %tmp18 + %tmp21 = trunc i64 %tmp20 to i32 + %tmp22 = icmp eq i32 %tmp21, 0 + br i1 %tmp22, label %bb32, label %bb23 + +bb23: ; preds = %bb13 + br i1 true, label %bb25, label %bb24 + +bb24: ; preds = %bb23 + br label %bb25 + +bb25: ; preds = %bb24, %bb23 + %tmp26 = add i8 %tmp15, -2 + %tmp27 = sext i8 %tmp26 to i64 + %tmp28 = sub i64 %tmp27, %tmp20 + %tmp29 = trunc i64 %tmp28 to i32 + %tmp30 = icmp eq i32 %tmp29, 0 + br i1 %tmp30, label %bb31, label %bb34 + +bb31: ; preds = %bb25 + br label %bb33 + +bb32: ; preds = %bb13 + br label %bb33 + +bb33: ; preds = %bb32, %bb31 + unreachable + +bb34: ; preds = %bb25 + %tmp35 = add nuw nsw i32 %tmp16, 2 + %tmp36 = icmp ugt i32 %tmp16, 52 + br i1 %tmp36, label %bb3, label %bb13 +} diff --git a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir index 630b34028162..c9ff2cd0d514 100644 --- a/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir +++ b/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -1,7 +1,7 @@ -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefix=UNPROFITABLE %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefix=PROFITABLE %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynosm1 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math %s | FileCheck --check-prefixes=PROFITABLE,ALL %s # name: f1_2s registers: @@ -80,3 +80,82 @@ body: | # PROFITABLE-LABEL: name: f1_2d # PROFITABLE: %5:fpr128 = FNEGv2f64 %2 # PROFITABLE-NEXT: FMLAv2f64 killed %5, %0, %1 +--- +name: f1_both_fmul_2s +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: fpr64 } + - { id: 2, class: fpr64 } + - { id: 3, class: fpr64 } + - { id: 4, class: fpr64 } + - { id: 5, class: fpr64 } + - { id: 6, class: fpr64 } +body: | + bb.0.entry: + %3:fpr64 = COPY %q3 + %2:fpr64 = COPY %q2 + %1:fpr64 = COPY %q1 + %0:fpr64 = COPY %q0 + %4:fpr64 = FMULv2f32 %0, %1 + %5:fpr64 = FMULv2f32 %2, %3 + %6:fpr64 = FSUBv2f32 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_2s +# ALL: %4:fpr64 = FMULv2f32 %0, %1 +# ALL-NEXT: FMLSv2f32 killed %4, %2, %3 +--- +name: f1_both_fmul_4s +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } + - { id: 5, class: fpr128 } + - { id: 6, class: fpr128 } +body: | + bb.0.entry: + %3:fpr128 = COPY %q3 + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %4:fpr128 = FMULv4f32 %0, %1 + %5:fpr128 = FMULv4f32 %2, %3 + %6:fpr128 = FSUBv4f32 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_4s +# ALL: %4:fpr128 = FMULv4f32 %0, %1 +# ALL-NEXT: FMLSv4f32 killed %4, %2, %3 +--- +name: f1_both_fmul_2d +registers: + - { id: 0, class: fpr128 } + - { id: 1, class: fpr128 } + - { id: 2, class: fpr128 } + - { id: 3, class: fpr128 } + - { id: 4, class: fpr128 } + - { id: 5, class: fpr128 } + - { id: 6, class: fpr128 } +body: | + bb.0.entry: + %3:fpr128 = COPY %q3 + %2:fpr128 = COPY %q2 + %1:fpr128 = COPY %q1 + %0:fpr128 = COPY %q0 + %4:fpr128 = FMULv2f64 %0, %1 + %5:fpr128 = FMULv2f64 %2, %3 + %6:fpr128 = FSUBv2f64 killed %4, %5 + %q0 = COPY %6 + RET_ReallyLR implicit %q0 + +... +# ALL-LABEL: name: f1_both_fmul_2d +# ALL: %4:fpr128 = FMULv2f64 %0, %1 +# ALL-NEXT: FMLSv2f64 killed %4, %2, %3 + diff --git a/test/CodeGen/AArch64/combine-and-like.ll b/test/CodeGen/AArch64/combine-and-like.ll new file mode 100644 index 000000000000..15770c2e02ff --- /dev/null +++ b/test/CodeGen/AArch64/combine-and-like.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s + +define i32 @f(i32 %a0) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = lshr i32 %a0, 2147483647 + %2 = add i32 %1, 2147483647 + %3 = and i32 %2, %1 + ret i32 %3 +} diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 306b95f0f3ae..af99b86ca5d1 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -129,7 +129,7 @@ entry: define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; GENERIC-LABEL: imulq512: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq512: @@ -143,7 +143,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; GENERIC-LABEL: imulq256: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq256: @@ -157,7 +157,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; GENERIC-LABEL: imulq128: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: imulq128: @@ -550,7 +550,7 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; GENERIC-LABEL: vpmulld_test: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [3:1.00] +; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vpmulld_test: diff --git a/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir b/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir index 965014162073..bbefc4f920a1 100644 --- a/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir +++ b/test/CodeGen/X86/branch_instruction_and_target_split_perf_nops.mir @@ -57,7 +57,7 @@ # return w; # } # -# CHECK: 129: eb 13 jmp 19 <ifElse+0x7E> +# CHECK: 129: eb 13 jmp 19 <ifElse+0x7e> # CHECK: 12e: eb a0 jmp -96 <ifElse+0x10> # CHECK: 132: eb 9c jmp -100 <ifElse+0x10> # CHECK: 137: eb 97 jmp -105 <ifElse+0x10> diff --git a/test/CodeGen/X86/combine-pmuldq.ll b/test/CodeGen/X86/combine-pmuldq.ll index 53ab87a386b3..ebfe0d56358e 100644 --- a/test/CodeGen/X86/combine-pmuldq.ll +++ b/test/CodeGen/X86/combine-pmuldq.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=AVX --check-prefix=AVX512DQVL ; TODO - shuffle+sext are superfluous define <2 x i64> @combine_shuffle_sext_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { @@ -66,13 +69,29 @@ define <2 x i64> @combine_shuffle_zero_pmuludq(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-NEXT: pmuludq %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: combine_shuffle_zero_pmuludq: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: combine_shuffle_zero_pmuludq: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: combine_shuffle_zero_pmuludq: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512DQVL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX512DQVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; AVX512DQVL-NEXT: retq %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7> %2 = shufflevector <4 x i32> %a1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 2, i32 7> %3 = bitcast <4 x i32> %1 to <2 x i64> @@ -94,13 +113,29 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) ; SSE-NEXT: pmuludq %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX-LABEL: combine_shuffle_zero_pmuludq_256: -; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] -; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] -; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX2-LABEL: combine_shuffle_zero_pmuludq_256: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: combine_shuffle_zero_pmuludq_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: combine_shuffle_zero_pmuludq_256: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] +; AVX512DQVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: retq %1 = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> %2 = shufflevector <8 x i32> %a1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> %3 = bitcast <8 x i32> %1 to <4 x i64> @@ -108,3 +143,46 @@ define <4 x i64> @combine_shuffle_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) %5 = mul <4 x i64> %3, %4 ret <4 x i64> %5 } + +define <8 x i64> @combine_zext_pmuludq_256(<8 x i32> %a) { +; SSE-LABEL: combine_zext_pmuludq_256: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] +; SSE-NEXT: pmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE-NEXT: pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; SSE-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero +; SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE-NEXT: movdqa {{.*#+}} xmm1 = [715827883,715827883] +; SSE-NEXT: pmuludq %xmm1, %xmm0 +; SSE-NEXT: pmuludq %xmm1, %xmm2 +; SSE-NEXT: pmuludq %xmm1, %xmm4 +; SSE-NEXT: pmuludq %xmm1, %xmm3 +; SSE-NEXT: movdqa %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX2-LABEL: combine_zext_pmuludq_256: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [715827883,715827883,715827883,715827883] +; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpmuludq %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: combine_zext_pmuludq_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; AVX512VL-NEXT: vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512VL-NEXT: retq +; +; AVX512DQVL-LABEL: combine_zext_pmuludq_256: +; AVX512DQVL: # %bb.0: +; AVX512DQVL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; AVX512DQVL-NEXT: vpmuludq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; AVX512DQVL-NEXT: retq + %1 = zext <8 x i32> %a to <8 x i64> + %2 = mul nuw nsw <8 x i64> %1, <i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883, i64 715827883> + ret <8 x i64> %2 +} diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll index 912110e75d27..62e86e3ad2cc 100644 --- a/test/CodeGen/X86/fdiv-combine.ll +++ b/test/CodeGen/X86/fdiv-combine.ll @@ -95,6 +95,41 @@ define double @div3_arcp(double %x, double %y, double %z) { ret double %ret } +define float @div_select_constant_fold(i1 zeroext %arg) { +; CHECK-LABEL: div_select_constant_fold: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB6_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq + %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 + %B2 = fdiv float %tmp, 1.000000e+00 + ret float %B2 +} + +define float @div_select_constant_fold_zero(i1 zeroext %arg) { +; CHECK-LABEL: div_select_constant_fold_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB7_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: jmp .LBB7_3 +; CHECK-NEXT: .LBB7_1: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: .LBB7_3: +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: divss %xmm1, %xmm0 +; CHECK-NEXT: retq + %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00 + %B2 = fdiv float %tmp, 0.000000e+00 + ret float %B2 +} + define void @PR24141() { ; CHECK-LABEL: PR24141: ; CHECK: callq diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index e09ad3e4e0b8..6431847064f0 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN ; RUN: llc -mtriple=i686-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN32 @@ -7,34 +8,59 @@ ; use an efficient mov/shift sequence rather than shuffling each individual ; element out of the index vector. -; CHECK-LABEL: foo: -; LIN: movdqa (%rsi), %xmm0 -; LIN: pand (%rdx), %xmm0 -; LIN: pextrq $1, %xmm0, %r[[REG4:.+]] -; LIN: movq %xmm0, %r[[REG2:.+]] -; LIN: movslq %e[[REG2]], %r[[REG1:.+]] -; LIN: sarq $32, %r[[REG2]] -; LIN: movslq %e[[REG4]], %r[[REG3:.+]] -; LIN: sarq $32, %r[[REG4]] -; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 -; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 -; LIN: movq %rdi, %xmm1 -; LIN: movq %r[[REG3]], %xmm0 - -; WIN: movdqa (%rdx), %xmm0 -; WIN: pand (%r8), %xmm0 -; WIN: pextrq $1, %xmm0, %r[[REG4:.+]] -; WIN: movq %xmm0, %r[[REG2:.+]] -; WIN: movslq %e[[REG2]], %r[[REG1:.+]] -; WIN: sarq $32, %r[[REG2]] -; WIN: movslq %e[[REG4]], %r[[REG3:.+]] -; WIN: sarq $32, %r[[REG4]] -; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 -; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 -; WIN: movdqa (%r[[REG2]]), %xmm0 -; WIN: movq %r[[REG2]], %xmm1 - define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { +; LIN-LABEL: foo: +; LIN: # %bb.0: +; LIN-NEXT: movdqa (%rsi), %xmm0 +; LIN-NEXT: pand (%rdx), %xmm0 +; LIN-NEXT: pextrq $1, %xmm0, %rax +; LIN-NEXT: movq %xmm0, %rcx +; LIN-NEXT: movslq %ecx, %rdx +; LIN-NEXT: sarq $32, %rcx +; LIN-NEXT: movslq %eax, %rsi +; LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; LIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN-NEXT: sarq $32, %rax +; LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; LIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; LIN-NEXT: retq +; +; WIN-LABEL: foo: +; WIN: # %bb.0: +; WIN-NEXT: movdqa (%rdx), %xmm0 +; WIN-NEXT: pand (%r8), %xmm0 +; WIN-NEXT: pextrq $1, %xmm0, %rax +; WIN-NEXT: movq %xmm0, %rdx +; WIN-NEXT: movslq %edx, %r8 +; WIN-NEXT: sarq $32, %rdx +; WIN-NEXT: movslq %eax, %r9 +; WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; WIN-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; WIN-NEXT: sarq $32, %rax +; WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; WIN-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; WIN-NEXT: retq +; +; LIN32-LABEL: foo: +; LIN32: # %bb.0: +; LIN32-NEXT: pushl %edi +; LIN32-NEXT: pushl %esi +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; LIN32-NEXT: movdqa (%edx), %xmm0 +; LIN32-NEXT: pand (%ecx), %xmm0 +; LIN32-NEXT: pextrd $1, %xmm0, %ecx +; LIN32-NEXT: pextrd $2, %xmm0, %edx +; LIN32-NEXT: pextrd $3, %xmm0, %esi +; LIN32-NEXT: movd %xmm0, %edi +; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; LIN32-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; LIN32-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; LIN32-NEXT: popl %esi +; LIN32-NEXT: popl %edi +; LIN32-NEXT: retl %a = load <4 x i32>, <4 x i32>* %i %b = load <4 x i32>, <4 x i32>* %h %j = and <4 x i32> %a, %b @@ -60,13 +86,81 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { ; Check that the sequence previously used above, which bounces the vector off the ; cache works for x86-32. Note that in this case it will not be used for index ; calculation, since indexes are 32-bit, not 64. -; CHECK-LABEL: old: -; LIN32: movaps %xmm0, (%esp) -; LIN32-DAG: {{(mov|and)}}l (%esp), -; LIN32-DAG: {{(mov|and)}}l 4(%esp), -; LIN32-DAG: {{(mov|and)}}l 8(%esp), -; LIN32-DAG: {{(mov|and)}}l 12(%esp), define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind { +; LIN-LABEL: old: +; LIN: # %bb.0: +; LIN-NEXT: movdqa (%rsi), %xmm0 +; LIN-NEXT: pand (%rdx), %xmm0 +; LIN-NEXT: pextrq $1, %xmm0, %rax +; LIN-NEXT: movq %rax, %rdx +; LIN-NEXT: shrq $32, %rdx +; LIN-NEXT: movq %xmm0, %rsi +; LIN-NEXT: movq %rsi, %rdi +; LIN-NEXT: shrq $32, %rdi +; LIN-NEXT: andl %ecx, %esi +; LIN-NEXT: andl %ecx, %eax +; LIN-NEXT: andq %rcx, %rdi +; LIN-NEXT: andq %rcx, %rdx +; LIN-NEXT: movq %rdi, %xmm1 +; LIN-NEXT: movq %rsi, %xmm0 +; LIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; LIN-NEXT: movq %rdx, %xmm2 +; LIN-NEXT: movq %rax, %xmm1 +; LIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; LIN-NEXT: retq +; +; WIN-LABEL: old: +; WIN: # %bb.0: +; WIN-NEXT: movdqa (%rdx), %xmm0 +; WIN-NEXT: pand (%r8), %xmm0 +; WIN-NEXT: pextrq $1, %xmm0, %r8 +; WIN-NEXT: movq %r8, %rcx +; WIN-NEXT: shrq $32, %rcx +; WIN-NEXT: movq %xmm0, %rax +; WIN-NEXT: movq %rax, %rdx +; WIN-NEXT: shrq $32, %rdx +; WIN-NEXT: andl %r9d, %eax +; WIN-NEXT: andl %r9d, %r8d +; WIN-NEXT: andq %r9, %rdx +; WIN-NEXT: andq %r9, %rcx +; WIN-NEXT: movq %rdx, %xmm1 +; WIN-NEXT: movq %rax, %xmm0 +; WIN-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; WIN-NEXT: movq %rcx, %xmm2 +; WIN-NEXT: movq %r8, %xmm1 +; WIN-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; WIN-NEXT: retq +; +; LIN32-LABEL: old: +; LIN32: # %bb.0: +; LIN32-NEXT: pushl %ebp +; LIN32-NEXT: movl %esp, %ebp +; LIN32-NEXT: pushl %esi +; LIN32-NEXT: andl $-16, %esp +; LIN32-NEXT: subl $32, %esp +; LIN32-NEXT: movl 20(%ebp), %eax +; LIN32-NEXT: movl 16(%ebp), %ecx +; LIN32-NEXT: movl 12(%ebp), %edx +; LIN32-NEXT: movaps (%edx), %xmm0 +; LIN32-NEXT: andps (%ecx), %xmm0 +; LIN32-NEXT: movaps %xmm0, (%esp) +; LIN32-NEXT: movl (%esp), %ecx +; LIN32-NEXT: andl %eax, %ecx +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; LIN32-NEXT: andl %eax, %edx +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; LIN32-NEXT: andl %eax, %esi +; LIN32-NEXT: andl {{[0-9]+}}(%esp), %eax +; LIN32-NEXT: movd %edx, %xmm1 +; LIN32-NEXT: movd %ecx, %xmm0 +; LIN32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; LIN32-NEXT: movd %eax, %xmm2 +; LIN32-NEXT: movd %esi, %xmm1 +; LIN32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; LIN32-NEXT: leal -4(%ebp), %esp +; LIN32-NEXT: popl %esi +; LIN32-NEXT: popl %ebp +; LIN32-NEXT: retl %a = load <4 x i32>, <4 x i32>* %i %b = load <4 x i32>, <4 x i32>* %h %j = and <4 x i32> %a, %b @@ -77,7 +171,7 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind %q0 = zext i32 %d0 to i64 %q1 = zext i32 %d1 to i64 %q2 = zext i32 %d2 to i64 - %q3 = zext i32 %d3 to i64 + %q3 = zext i32 %d3 to i64 %r0 = and i64 %q0, %f %r1 = and i64 %q1, %f %r2 = and i64 %q2, %f diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index d318dde34434..d3521ca9f1e3 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -497,7 +497,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 @@ -510,7 +510,7 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { ; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 @@ -582,7 +582,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1 -; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 @@ -595,7 +595,7 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { ; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 ; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1 +; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/setcc-combine.ll b/test/CodeGen/X86/setcc-combine.ll index a4a8e67d742c..56cff4ab6f2f 100644 --- a/test/CodeGen/X86/setcc-combine.ll +++ b/test/CodeGen/X86/setcc-combine.ll @@ -183,3 +183,27 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) { ret i32 %t1 } +; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) +; Don't combine with i1 - out of range constant +define void @test_i1_uge(i1 *%A2) { +; CHECK-LABEL: test_i1_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: movb (%rdi), %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: xorb $1, %cl +; CHECK-NEXT: andb %cl, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negq %rax +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movb %cl, (%rdi,%rax) +; CHECK-NEXT: retq + %L5 = load i1, i1* %A2 + %C3 = icmp ne i1 %L5, true + %C8 = icmp eq i1 %L5, false + %C9 = icmp ugt i1 %C3, %C8 + %G3 = getelementptr i1, i1* %A2, i1 %C9 + store i1 %C3, i1* %G3 + ret void +} + diff --git a/test/CodeGen/X86/shrink_vmul.ll b/test/CodeGen/X86/shrink_vmul.ll index a516c709517d..ced3a40e4a46 100644 --- a/test/CodeGen/X86/shrink_vmul.ll +++ b/test/CodeGen/X86/shrink_vmul.ll @@ -112,13 +112,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE-NEXT: movl c, %esi ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: pxor %xmm2, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; X86-SSE-NEXT: pmullw %xmm0, %xmm1 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X86-SSE-NEXT: movdqu %xmm1, (%esi,%ecx,4) +; X86-SSE-NEXT: pxor %xmm1, %xmm1 +; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X86-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: pmaddwd %xmm0, %xmm2 +; X86-SSE-NEXT: movdqu %xmm2, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -142,13 +143,14 @@ define void @mul_4xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 ; X64-SSE: # %bb.0: # %entry ; X64-SSE-NEXT: movq {{.*}}(%rip), %rax ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-SSE-NEXT: pxor %xmm2, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; X64-SSE-NEXT: pmullw %xmm0, %xmm1 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] -; X64-SSE-NEXT: movdqu %xmm1, (%rax,%rdx,4) +; X64-SSE-NEXT: pxor %xmm1, %xmm1 +; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-SSE-NEXT: pmaddwd %xmm0, %xmm2 +; X64-SSE-NEXT: movdqu %xmm2, (%rax,%rdx,4) ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_4xi8: @@ -2215,13 +2217,7 @@ define void @PR34947() { ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl (%eax) ; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X86-SSE-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm1 ; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007 ; X86-SSE-NEXT: movd %eax, %xmm2 ; X86-SSE-NEXT: pmuludq %xmm0, %xmm2 @@ -2415,13 +2411,7 @@ define void @PR34947() { ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl (%rax) ; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X64-SSE-NEXT: pmuludq %xmm2, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; X64-SSE-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007 ; X64-SSE-NEXT: movd %eax, %xmm2 ; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 diff --git a/test/CodeGen/X86/slow-pmulld.ll b/test/CodeGen/X86/slow-pmulld.ll index 4d73b11349f5..325e6ee4085a 100644 --- a/test/CodeGen/X86/slow-pmulld.ll +++ b/test/CodeGen/X86/slow-pmulld.ll @@ -10,22 +10,14 @@ define <4 x i32> @foo(<4 x i8> %A) { ; CHECK32-LABEL: foo: ; CHECK32: # %bb.0: -; CHECK32-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u] -; CHECK32-NEXT: movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u> -; CHECK32-NEXT: movdqa %xmm0, %xmm2 -; CHECK32-NEXT: pmullw %xmm1, %xmm0 -; CHECK32-NEXT: pmulhw %xmm1, %xmm2 -; CHECK32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; CHECK32-NEXT: pand {{\.LCPI.*}}, %xmm0 +; CHECK32-NEXT: pmaddwd {{\.LCPI.*}}, %xmm0 ; CHECK32-NEXT: retl ; ; CHECK64-LABEL: foo: ; CHECK64: # %bb.0: -; CHECK64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[4],zero,xmm0[8],zero,xmm0[12],zero,xmm0[u,u,u,u,u,u,u,u] -; CHECK64-NEXT: movdqa {{.*#+}} xmm1 = <18778,18778,18778,18778,u,u,u,u> -; CHECK64-NEXT: movdqa %xmm0, %xmm2 -; CHECK64-NEXT: pmullw %xmm1, %xmm0 -; CHECK64-NEXT: pmulhw %xmm1, %xmm2 -; CHECK64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; CHECK64-NEXT: pand {{.*}}(%rip), %xmm0 +; CHECK64-NEXT: pmaddwd {{.*}}(%rip), %xmm0 ; CHECK64-NEXT: retq ; ; SSE4-32-LABEL: foo: diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index ad2edfe0959e..a789b861b7aa 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -5624,16 +5624,8 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; ATOM-LABEL: test_pmaddwd: ; ATOM: # %bb.0: -; ATOM-NEXT: pmaddwd %xmm1, %xmm0 -; ATOM-NEXT: pmaddwd (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00] +; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pmaddwd: @@ -6241,16 +6233,8 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; ATOM-LABEL: test_pmuludq: ; ATOM: # %bb.0: -; ATOM-NEXT: pmuludq %xmm1, %xmm0 -; ATOM-NEXT: pmuludq (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00] +; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pmuludq: @@ -6394,12 +6378,8 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; ATOM-LABEL: test_psadbw: ; ATOM: # %bb.0: -; ATOM-NEXT: psadbw %xmm1, %xmm0 -; ATOM-NEXT: psadbw (%rdi), %xmm0 -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [1:1.00] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] diff --git a/test/MC/ARM/branch-disassemble.s b/test/MC/ARM/branch-disassemble.s index 4df40e05e8c9..5380a1d0b9e4 100644 --- a/test/MC/ARM/branch-disassemble.s +++ b/test/MC/ARM/branch-disassemble.s @@ -7,8 +7,8 @@ @ RUN: | FileCheck %s -check-prefix CHECK-THUMB b.w .Lbranch -@ CHECK-ARM: b #4 <$a.0+0xC> -@ CHECK-THUMB: b.w #8 <$t.0+0xC> +@ CHECK-ARM: b #4 <$a.0+0xc> +@ CHECK-THUMB: b.w #8 <$t.0+0xc> adds r0, r1, #42 adds r1, r2, #42 .Lbranch: diff --git a/test/MC/ELF/comdat-declaration-errors.s b/test/MC/ELF/comdat-declaration-errors.s new file mode 100644 index 000000000000..fade8cfe41dd --- /dev/null +++ b/test/MC/ELF/comdat-declaration-errors.s @@ -0,0 +1,14 @@ +// RUN: not llvm-mc -triple x86_64-pc-linux-gnu %s \ +// RUN: -filetype=obj -o %t.o 2>&1 | FileCheck %s + +// Check we error out on incorrect COMDATs declarations +// and not just silently ingnore them. + +// CHECK: error: invalid group name +// CHECK-NEXT: .section .foo,"G",@progbits,-abc,comdat + +// CHECK: error: invalid linkage +// CHECK-NEXT: .section .bar,"G",@progbits,abc,-comdat + +.section .foo,"G",@progbits,-abc,comdat +.section .bar,"G",@progbits,abc,-comdat diff --git a/test/MC/X86/PREFETCH-32.s b/test/MC/X86/PREFETCH-32.s new file mode 100644 index 000000000000..caec44ea386c --- /dev/null +++ b/test/MC/X86/PREFETCH-32.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: prefetch -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0x10,0xe3,0x0f,0xe3] +prefetch -485498096(%edx,%eax,4) + +// CHECK: prefetch 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x84,0x82,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096(%edx,%eax,4) + +// CHECK: prefetch 485498096(%edx) +// CHECK: encoding: [0x0f,0x0d,0x82,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096(%edx) + +// CHECK: prefetch 485498096 +// CHECK: encoding: [0x0f,0x0d,0x05,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096 + +// CHECK: prefetch 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40] +prefetch 64(%edx,%eax) + +// CHECK: prefetch (%edx) +// CHECK: encoding: [0x0f,0x0d,0x02] +prefetch (%edx) + +// CHECK: prefetchnta -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x84,0x82,0x10,0xe3,0x0f,0xe3] +prefetchnta -485498096(%edx,%eax,4) + +// CHECK: prefetchnta 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x84,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096(%edx,%eax,4) + +// CHECK: prefetchnta 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096(%edx) + +// CHECK: prefetchnta 485498096 +// CHECK: encoding: [0x0f,0x18,0x05,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096 + +// CHECK: prefetchnta 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40] +prefetchnta 64(%edx,%eax) + +// CHECK: prefetchnta (%edx) +// CHECK: encoding: [0x0f,0x18,0x02] +prefetchnta (%edx) + +// CHECK: prefetcht0 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0x10,0xe3,0x0f,0xe3] +prefetcht0 -485498096(%edx,%eax,4) + +// CHECK: prefetcht0 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x8c,0x82,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096(%edx,%eax,4) + +// CHECK: prefetcht0 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x8a,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096(%edx) + +// CHECK: prefetcht0 485498096 +// CHECK: encoding: [0x0f,0x18,0x0d,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096 + +// CHECK: prefetcht0 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40] +prefetcht0 64(%edx,%eax) + +// CHECK: prefetcht0 (%edx) +// CHECK: encoding: [0x0f,0x18,0x0a] +prefetcht0 (%edx) + +// CHECK: prefetcht1 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x94,0x82,0x10,0xe3,0x0f,0xe3] +prefetcht1 -485498096(%edx,%eax,4) + +// CHECK: prefetcht1 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x94,0x82,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096(%edx,%eax,4) + +// CHECK: prefetcht1 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x92,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096(%edx) + +// CHECK: prefetcht1 485498096 +// CHECK: encoding: [0x0f,0x18,0x15,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096 + +// CHECK: prefetcht1 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40] +prefetcht1 64(%edx,%eax) + +// CHECK: prefetcht1 (%edx) +// CHECK: encoding: [0x0f,0x18,0x12] +prefetcht1 (%edx) + +// CHECK: prefetcht2 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0x10,0xe3,0x0f,0xe3] +prefetcht2 -485498096(%edx,%eax,4) + +// CHECK: prefetcht2 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x18,0x9c,0x82,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096(%edx,%eax,4) + +// CHECK: prefetcht2 485498096(%edx) +// CHECK: encoding: [0x0f,0x18,0x9a,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096(%edx) + +// CHECK: prefetcht2 485498096 +// CHECK: encoding: [0x0f,0x18,0x1d,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096 + +// CHECK: prefetcht2 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40] +prefetcht2 64(%edx,%eax) + +// CHECK: prefetcht2 (%edx) +// CHECK: encoding: [0x0f,0x18,0x1a] +prefetcht2 (%edx) + +// CHECK: prefetchw -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0x10,0xe3,0x0f,0xe3] +prefetchw -485498096(%edx,%eax,4) + +// CHECK: prefetchw 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x8c,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096(%edx,%eax,4) + +// CHECK: prefetchw 485498096(%edx) +// CHECK: encoding: [0x0f,0x0d,0x8a,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096(%edx) + +// CHECK: prefetchw 485498096 +// CHECK: encoding: [0x0f,0x0d,0x0d,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096 + +// CHECK: prefetchw 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40] +prefetchw 64(%edx,%eax) + +// CHECK: prefetchw (%edx) +// CHECK: encoding: [0x0f,0x0d,0x0a] +prefetchw (%edx) + +// CHECK: prefetchwt1 -485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0x10,0xe3,0x0f,0xe3] +prefetchwt1 -485498096(%edx,%eax,4) + +// CHECK: prefetchwt1 485498096(%edx,%eax,4) +// CHECK: encoding: [0x0f,0x0d,0x94,0x82,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096(%edx,%eax,4) + +// CHECK: prefetchwt1 485498096(%edx) +// CHECK: encoding: [0x0f,0x0d,0x92,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096(%edx) + +// CHECK: prefetchwt1 485498096 +// CHECK: encoding: [0x0f,0x0d,0x15,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096 + +// CHECK: prefetchwt1 64(%edx,%eax) +// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40 +prefetchwt1 64(%edx,%eax) + +// CHECK: prefetchwt1 (%edx) +// CHECK: encoding: [0x0f,0x0d,0x12] +prefetchwt1 (%edx) diff --git a/test/MC/X86/PREFETCH-64.s b/test/MC/X86/PREFETCH-64.s new file mode 100644 index 000000000000..0c4a126a2a20 --- /dev/null +++ b/test/MC/X86/PREFETCH-64.s @@ -0,0 +1,170 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: prefetch 485498096 +// CHECK: encoding: [0x0f,0x0d,0x04,0x25,0xf0,0x1c,0xf0,0x1c] +prefetch 485498096 + +// CHECK: prefetch 64(%rdx) +// CHECK: encoding: [0x0f,0x0d,0x42,0x40] +prefetch 64(%rdx) + +// CHECK: prefetch 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0x40] +prefetch 64(%rdx,%rax,4) + +// CHECK: prefetch -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x44,0x82,0xc0] +prefetch -64(%rdx,%rax,4) + +// CHECK: prefetch 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x0d,0x44,0x02,0x40] +prefetch 64(%rdx,%rax) + +// CHECK: prefetchnta 485498096 +// CHECK: encoding: [0x0f,0x18,0x04,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchnta 485498096 + +// CHECK: prefetchnta 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x42,0x40] +prefetchnta 64(%rdx) + +// CHECK: prefetchnta 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x44,0x82,0x40] +prefetchnta 64(%rdx,%rax,4) + +// CHECK: prefetchnta -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x44,0x82,0xc0] +prefetchnta -64(%rdx,%rax,4) + +// CHECK: prefetchnta 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x44,0x02,0x40] +prefetchnta 64(%rdx,%rax) + +// CHECK: prefetchnta (%rdx) +// CHECK: encoding: [0x0f,0x18,0x02] +prefetchnta (%rdx) + +// CHECK: prefetch (%rdx) +// CHECK: encoding: [0x0f,0x0d,0x02] +prefetch (%rdx) + +// CHECK: prefetcht0 485498096 +// CHECK: encoding: [0x0f,0x18,0x0c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetcht0 485498096 + +// CHECK: prefetcht0 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x4a,0x40] +prefetcht0 64(%rdx) + +// CHECK: prefetcht0 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0x40] +prefetcht0 64(%rdx,%rax,4) + +// CHECK: prefetcht0 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x4c,0x82,0xc0] +prefetcht0 -64(%rdx,%rax,4) + +// CHECK: prefetcht0 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x4c,0x02,0x40] +prefetcht0 64(%rdx,%rax) + +// CHECK: prefetcht0 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x0a] +prefetcht0 (%rdx) + +// CHECK: prefetcht1 485498096 +// CHECK: encoding: [0x0f,0x18,0x14,0x25,0xf0,0x1c,0xf0,0x1c] +prefetcht1 485498096 + +// CHECK: prefetcht1 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x52,0x40] +prefetcht1 64(%rdx) + +// CHECK: prefetcht1 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x54,0x82,0x40] +prefetcht1 64(%rdx,%rax,4) + +// CHECK: prefetcht1 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x54,0x82,0xc0] +prefetcht1 -64(%rdx,%rax,4) + +// CHECK: prefetcht1 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x54,0x02,0x40] +prefetcht1 64(%rdx,%rax) + +// CHECK: prefetcht1 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x12] +prefetcht1 (%rdx) + +// CHECK: prefetcht2 485498096 +// CHECK: encoding: [0x0f,0x18,0x1c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetcht2 485498096 + +// CHECK: prefetcht2 64(%rdx) +// CHECK: encoding: [0x0f,0x18,0x5a,0x40] +prefetcht2 64(%rdx) + +// CHECK: prefetcht2 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0x40] +prefetcht2 64(%rdx,%rax,4) + +// CHECK: prefetcht2 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x18,0x5c,0x82,0xc0] +prefetcht2 -64(%rdx,%rax,4) + +// CHECK: prefetcht2 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x18,0x5c,0x02,0x40] +prefetcht2 64(%rdx,%rax) + +// CHECK: prefetcht2 (%rdx) +// CHECK: encoding: [0x0f,0x18,0x1a] +prefetcht2 (%rdx) + +// CHECK: prefetchw 485498096 +// CHECK: encoding: [0x0f,0x0d,0x0c,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchw 485498096 + +// CHECK: prefetchw 64(%rdx) +// CHECK: encoding: [0x0f,0x0d,0x4a,0x40] +prefetchw 64(%rdx) + +// CHECK: prefetchw 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0x40] +prefetchw 64(%rdx,%rax,4) + +// CHECK: prefetchw -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x82,0xc0] +prefetchw -64(%rdx,%rax,4) + +// CHECK: prefetchw 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x0d,0x4c,0x02,0x40] +prefetchw 64(%rdx,%rax) + +// CHECK: prefetchw (%rdx) +// CHECK: encoding: [0x0f,0x0d,0x0a] +prefetchw (%rdx) + +// CHECK: prefetchwt1 485498096 +// CHECK: encoding: [0x0f,0x0d,0x14,0x25,0xf0,0x1c,0xf0,0x1c] +prefetchwt1 485498096 + +// CHECK: prefetchwt1 64(%rdx) +// CHECK: encoding: [0x0f,0x0d,0x52,0x40] +prefetchwt1 64(%rdx) + +// CHECK: prefetchwt1 64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0x40] +prefetchwt1 64(%rdx,%rax,4) + +// CHECK: prefetchwt1 -64(%rdx,%rax,4) +// CHECK: encoding: [0x0f,0x0d,0x54,0x82,0xc0] +prefetchwt1 -64(%rdx,%rax,4) + +// CHECK: prefetchwt1 64(%rdx,%rax) +// CHECK: encoding: [0x0f,0x0d,0x54,0x02,0x40] +prefetchwt1 64(%rdx,%rax) + +// CHECK: prefetchwt1 (%rdx) +// CHECK: encoding: [0x0f,0x0d,0x12] +prefetchwt1 (%rdx) + diff --git a/test/MC/X86/RDPMC-32.s b/test/MC/X86/RDPMC-32.s new file mode 100644 index 000000000000..5168af3a62c1 --- /dev/null +++ b/test/MC/X86/RDPMC-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdpmc +// CHECK: encoding: [0x0f,0x33] +rdpmc + diff --git a/test/MC/X86/RDPMC-64.s b/test/MC/X86/RDPMC-64.s new file mode 100644 index 000000000000..56fa3d9fa828 --- /dev/null +++ b/test/MC/X86/RDPMC-64.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdpmc +// CHECK: encoding: [0x0f,0x33] +rdpmc + diff --git a/test/MC/X86/RDRAND-32.s b/test/MC/X86/RDRAND-32.s new file mode 100644 index 000000000000..5461ca74eabb --- /dev/null +++ b/test/MC/X86/RDRAND-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdrandl %eax +// CHECK: encoding: [0x0f,0xc7,0xf0] +rdrandl %eax + diff --git a/test/MC/X86/RDRAND-64.s b/test/MC/X86/RDRAND-64.s new file mode 100644 index 000000000000..3482c0ecd5c3 --- /dev/null +++ b/test/MC/X86/RDRAND-64.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdrandl %r13d +// CHECK: encoding: [0x41,0x0f,0xc7,0xf5] +rdrandl %r13d + +// CHECK: rdrandq %r13 +// CHECK: encoding: [0x49,0x0f,0xc7,0xf5] +rdrandq %r13 + +// CHECK: rdrandw %r13w +// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf5] +rdrandw %r13w + diff --git a/test/MC/X86/RDSEED-32.s b/test/MC/X86/RDSEED-32.s new file mode 100644 index 000000000000..87be0d502a66 --- /dev/null +++ b/test/MC/X86/RDSEED-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdseedl %eax +// CHECK: encoding: [0x0f,0xc7,0xf8] +rdseedl %eax + diff --git a/test/MC/X86/RDSEED-64.s b/test/MC/X86/RDSEED-64.s new file mode 100644 index 000000000000..0d710ceaa5bb --- /dev/null +++ b/test/MC/X86/RDSEED-64.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdseedl %r13d +// CHECK: encoding: [0x41,0x0f,0xc7,0xfd] +rdseedl %r13d + +// CHECK: rdseedq %r13 +// CHECK: encoding: [0x49,0x0f,0xc7,0xfd] +rdseedq %r13 + +// CHECK: rdseedw %r13w +// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfd] +rdseedw %r13w + diff --git a/test/MC/X86/RDTSCP-32.s b/test/MC/X86/RDTSCP-32.s new file mode 100644 index 000000000000..48232edf7d52 --- /dev/null +++ b/test/MC/X86/RDTSCP-32.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdtscp +// CHECK: encoding: [0x0f,0x01,0xf9] +rdtscp + diff --git a/test/MC/X86/RDTSCP-64.s b/test/MC/X86/RDTSCP-64.s new file mode 100644 index 000000000000..045fd49a2738 --- /dev/null +++ b/test/MC/X86/RDTSCP-64.s @@ -0,0 +1,6 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdtscp +// CHECK: encoding: [0x0f,0x01,0xf9] +rdtscp + diff --git a/test/MC/X86/RDWRFSGS-64.s b/test/MC/X86/RDWRFSGS-64.s new file mode 100644 index 000000000000..47314caf867d --- /dev/null +++ b/test/MC/X86/RDWRFSGS-64.s @@ -0,0 +1,34 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdfsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xc5] +rdfsbasel %r13d + +// CHECK: rdfsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xc5] +rdfsbaseq %r13 + +// CHECK: rdgsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xcd] +rdgsbasel %r13d + +// CHECK: rdgsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xcd] +rdgsbaseq %r13 + +// CHECK: wrfsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xd5] +wrfsbasel %r13d + +// CHECK: wrfsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xd5] +wrfsbaseq %r13 + +// CHECK: wrgsbasel %r13d +// CHECK: encoding: [0xf3,0x41,0x0f,0xae,0xdd] +wrgsbasel %r13d + +// CHECK: wrgsbaseq %r13 +// CHECK: encoding: [0xf3,0x49,0x0f,0xae,0xdd] +wrgsbaseq %r13 + diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index b1e89cde9796..23846d921a8c 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -99,6 +99,10 @@ // CHECK: shll $2, %eax sall $2, %eax +// CHECK: rep movsb +rep # comment +movsb + // CHECK: rep // CHECK: insb rep;insb diff --git a/test/MC/X86/x86_64-asm-match.s b/test/MC/X86/x86_64-asm-match.s new file mode 100644 index 000000000000..3208e4f4e0f0 --- /dev/null +++ b/test/MC/X86/x86_64-asm-match.s @@ -0,0 +1,52 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -debug-only=asm-matcher %s 2>&1 | FileCheck %s +// REQUIRES: asserts + +// CHECK: AsmMatcher: found 4 encodings with mnemonic 'pshufb' +// CHECK:Trying to match opcode MMX_PSHUFBrr64 +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode PSHUFBrr +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode PSHUFBrm +// CHECK: Matching formal operand class MCK_Mem128 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 2 encodings with mnemonic 'sha1rnds4' +// CHECK:Trying to match opcode SHA1RNDS4rri +// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 4 encodings with mnemonic 'pinsrw' +// CHECK:Trying to match opcode MMX_PINSRWirri +// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 3 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode PINSRWrri +// CHECK: Matching formal operand class MCK_ImmUnsignedi8 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_GR32orGR64 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_FR32 against actual operand at index 3 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 4: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 2 encodings with mnemonic 'crc32l' +// CHECK:Trying to match opcode CRC32r32r32 +// CHECK: Matching formal operand class MCK_GR32 against actual operand at index 1 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode CRC32r32m32 +// CHECK: Matching formal operand class MCK_Mem32 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_GR32 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode +// CHECK:AsmMatcher: found 4 encodings with mnemonic 'punpcklbw' +// CHECK:Trying to match opcode MMX_PUNPCKLBWirr +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 2 (): Opcode result: multiple operand mismatches, ignoring this opcode +// CHECK:Trying to match opcode MMX_PUNPCKLBWirm +// CHECK: Matching formal operand class MCK_VR64 against actual operand at index 1 (): match success using generic matcher +// CHECK: Matching formal operand class MCK_Mem64 against actual operand at index 2 (): match success using generic matcher +// CHECK: Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range Opcode result: complete match, selecting this opcode + + +pshufb CPI1_0(%rip), %xmm1 +sha1rnds4 $1, %xmm1, %xmm2 +pinsrw $3, %ecx, %xmm5 +crc32l %gs:0xdeadbeef(%rbx,%rcx,8),%ecx + +.intel_syntax +punpcklbw mm0, qword ptr [rsp] diff --git a/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll b/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll new file mode 100644 index 000000000000..4df19b2d7262 --- /dev/null +++ b/test/SafepointIRVerifier/from-same-relocation-in-phi-nodes.ll @@ -0,0 +1,26 @@ +; XFAIL: * +; RUN: opt -safepoint-ir-verifier-print-only -verify-safepoint-ir -S %s 2>&1 | FileCheck %s + +; In %merge %val.unrelocated, %ptr and %arg should be unrelocated. +; FIXME: if this test fails it is a false-positive alarm. IR is correct. +define void @test.unrelocated-phi.ok(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.ok + bci_0: + %ptr = getelementptr i8, i8 addrspace(1)* %arg, i64 4 + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + br label %merge + + right: + br label %merge + + merge: +; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.ok + %val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ %ptr, %right ] + %c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) diff --git a/test/SafepointIRVerifier/unrecorded-live-at-sp.ll b/test/SafepointIRVerifier/unrecorded-live-at-sp.ll index e3f21c3e7133..5cd4aa741454 100644 --- a/test/SafepointIRVerifier/unrecorded-live-at-sp.ll +++ b/test/SafepointIRVerifier/unrecorded-live-at-sp.ll @@ -1,8 +1,9 @@ ; RUN: opt %s -safepoint-ir-verifier-print-only -verify-safepoint-ir -S 2>&1 | FileCheck %s ; CHECK: Illegal use of unrelocated value found! -; CHECK-NEXT: Def: %base_phi3 = phi %jObject addrspace(1)* [ %obj609.relocated, %not_zero146 ], [ %base_phi2, %bci_37-aload ], !is_base_value !0 -; CHECK-NEXT: Use: %base_phi2 = phi %jObject addrspace(1)* [ %base_phi3, %not_zero179 ], [ %cast5, %bci_0 ], !is_base_value !0 +; CHECK-NEXT: Def: %base_phi4 = phi %jObject addrspace(1)* addrspace(1)* [ %addr98.relocated, %not_zero146 ], [ %cast6, %bci_37-aload ], !is_base_value !0 +; CHECK-NEXT: Use: %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, %jObject addrspace(1)* %base_phi1, %jObject addrspace(1)* addrspace(1)* %base_phi4, %jObject addrspace(1)* addrspace(1)* %relocated4, %jObject addrspace(1)* %relocated7) + %jObject = type { [8 x i8] } diff --git a/test/SafepointIRVerifier/uses-in-phi-nodes.ll b/test/SafepointIRVerifier/uses-in-phi-nodes.ll index d06eb6e0d9a7..bbf98577230d 100644 --- a/test/SafepointIRVerifier/uses-in-phi-nodes.ll +++ b/test/SafepointIRVerifier/uses-in-phi-nodes.ll @@ -14,9 +14,9 @@ define i8 addrspace(1)* @test.not.ok.0(i8 addrspace(1)* %arg) gc "statepoint-exa merge: ; CHECK: Illegal use of unrelocated value found! -; CHECK-NEXT: Def: i8 addrspace(1)* %arg -; CHECK-NEXT: Use: %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ] - %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right] +; CHECK-NEXT: Def: %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ] +; CHECK-NEXT: Use: ret i8 addrspace(1)* %val + %val = phi i8 addrspace(1)* [ %arg, %left ], [ %arg, %right ] ret i8 addrspace(1)* %val } @@ -34,9 +34,9 @@ define i8 addrspace(1)* @test.not.ok.1(i8 addrspace(1)* %arg) gc "statepoint-exa merge: ; CHECK: Illegal use of unrelocated value found! -; CHECK-NEXT: Def: i8 addrspace(1)* %arg -; CHECK-NEXT: Use: %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] - %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right] +; CHECK-NEXT: Def: %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] +; CHECK-NEXT: Use: ret i8 addrspace(1)* %val + %val = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] ret i8 addrspace(1)* %val } @@ -74,5 +74,99 @@ define i8 addrspace(1)* @test.ok.1(i8 addrspace(1)* %arg) gc "statepoint-example ret i8 addrspace(1)* %val } +; It should be allowed to compare poisoned ptr with null. +define void @test.poisoned.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.ok + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg + br label %merge + + right: + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + br label %merge + + merge: +; CHECK: No illegal uses found by SafepointIRVerifier in: test.poisoned.cmp.ok + %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] + %c = icmp eq i8 addrspace(1)* %val.poisoned, null + ret void +} + +; It is illegal to compare poisoned ptr and relocated. +define void @test.poisoned.cmp.fail.0(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.0 + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg + br label %merge + + right: + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg + br label %merge + + merge: +; CHECK: Illegal use of unrelocated value found! +; CHECK-NEXT: Def: %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] +; CHECK-NEXT: Use: %c = icmp eq i8 addrspace(1)* %val.poisoned, %val + %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] + %val = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg.relocated2, %right ] + %c = icmp eq i8 addrspace(1)* %val.poisoned, %val + ret void +} + +; It is illegal to compare poisoned ptr and unrelocated. +define void @test.poisoned.cmp.fail.1(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.poisoned.cmp.fail.1 + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; arg, arg + br label %merge + + right: + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %arg , i32 -1, i32 0, i32 0, i32 0) + %arg.relocated2 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 7, i32 7) ; arg, arg + br label %merge + + merge: +; CHECK: Illegal use of unrelocated value found! +; CHECK-NEXT: Def: %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] +; CHECK-NEXT: Use: %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg + %val.poisoned = phi i8 addrspace(1)* [ %arg.relocated, %left ], [ %arg, %right ] + %c = icmp eq i8 addrspace(1)* %val.poisoned, %arg + ret void +} + +; It should be allowed to compare unrelocated phi with unrelocated value. +define void @test.unrelocated-phi.cmp.ok(i8 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: Verifying gc pointers in function: test.unrelocated-phi.cmp.ok + bci_0: + br i1 undef, label %left, label %right + + left: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0) + br label %merge + + right: + br label %merge + + merge: +; CHECK: No illegal uses found by SafepointIRVerifier in: test.unrelocated-phi.cmp.ok + %val.unrelocated = phi i8 addrspace(1)* [ %arg, %left ], [ null, %right ] + %c = icmp eq i8 addrspace(1)* %val.unrelocated, %arg + ret void +} + declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) declare void @not_statepoint() diff --git a/test/ThinLTO/X86/Inputs/noinline.ll b/test/ThinLTO/X86/Inputs/noinline.ll new file mode 100644 index 000000000000..73db2912cabc --- /dev/null +++ b/test/ThinLTO/X86/Inputs/noinline.ll @@ -0,0 +1,8 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i32 @foo(i32) local_unnamed_addr #0 { + ret i32 10 +} + +attributes #0 = { noinline } diff --git a/test/ThinLTO/X86/noinline.ll b/test/ThinLTO/X86/noinline.ll new file mode 100644 index 000000000000..27f59ab90967 --- /dev/null +++ b/test/ThinLTO/X86/noinline.ll @@ -0,0 +1,26 @@ +; This test checks that ThinLTO doesn't try to import noinline function +; which, when takes place, causes promotion of its callee. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/noinline.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t3.o \ +; RUN: -save-temps \ +; RUN: -r=%t1.bc,main,px \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t2.bc,foo,p + +; RUN: llvm-dis %t3.o.1.3.import.bc -o - | FileCheck %s + +; CHECK-NOT: define available_externally i32 @foo + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +; Function Attrs: nounwind ssp uwtable +define i32 @main(i32, i8** nocapture readnone) local_unnamed_addr #0 { + %3 = tail call i32 @foo(i32 %0) #0 + ret i32 %3 +} + +declare i32 @foo(i32) local_unnamed_addr + +attributes #0 = { nounwind } diff --git a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll index fde0692d00a2..b05b27f533bb 100644 --- a/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll +++ b/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll @@ -122,6 +122,19 @@ entry: } +define i8 @sub_compare_folding_swapPD256_undef(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @sub_compare_folding_swapPD256_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> fsub (<4 x double> undef, <4 x double> undef), <4 x double> zeroinitializer, i32 5, i8 -1) +; CHECK-NEXT: ret i8 [[TMP]] +; +entry: + %sub.i1 = fsub ninf <4 x double> undef, undef + %tmp = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1) + ret i8 %tmp +} + + define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){ ; CHECK-LABEL: @sub_compare_folding_swapPD512( ; CHECK-NEXT: entry: diff --git a/test/Transforms/InstCombine/extractelement.ll b/test/Transforms/InstCombine/extractelement.ll new file mode 100644 index 000000000000..66fbd25947dc --- /dev/null +++ b/test/Transforms/InstCombine/extractelement.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @extractelement_out_of_range(<2 x i32> %x) { +; CHECK-LABEL: @extractelement_out_of_range( +; CHECK-NEXT: [[E1:%.*]] = extractelement <2 x i32> [[X:%.*]], i8 16 +; CHECK-NEXT: ret i32 [[E1]] +; + %E1 = extractelement <2 x i32> %x, i8 16 + ret i32 %E1 +} diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll index c6f88fb9cf05..e0698f8b3b77 100644 --- a/test/Transforms/InstCombine/intrinsics.ll +++ b/test/Transforms/InstCombine/intrinsics.ll @@ -267,12 +267,17 @@ define void @powi(double %V, double *%P) { %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind store volatile double %C, double* %P + + %D = tail call double @llvm.powi.f64(double %V, i32 2) nounwind + store volatile double %D, double* %P ret void ; CHECK-LABEL: @powi( ; CHECK: %A = fdiv double 1.0{{.*}}, %V ; CHECK: store volatile double %A, ; CHECK: store volatile double 1.0 ; CHECK: store volatile double %V +; CHECK: %D = fmul double %V, %V +; CHECK: store volatile double %D } define i32 @cttz(i32 %a) { diff --git a/test/Transforms/InstCombine/minmax-fold.ll b/test/Transforms/InstCombine/minmax-fold.ll index 6004a55f0f8e..933aac7e23f2 100644 --- a/test/Transforms/InstCombine/minmax-fold.ll +++ b/test/Transforms/InstCombine/minmax-fold.ll @@ -744,3 +744,158 @@ define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) { %res = select <2 x i1> %cmp, <2 x i8> %x_trunc, <2 x i8> <i8 255, i8 255> ret <2 x i8> %res } + +; Remove a min/max op in a sequence with a common operand. +; PR35717: https://bugs.llvm.org/show_bug.cgi?id=35717 + +; min(min(a, b), min(b, c)) --> min(min(a, b), c) + +define i32 @common_factor_smin(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_smin( +; CHECK-NEXT: [[CMP_AB:%.*]] = icmp slt i32 %a, %b +; CHECK-NEXT: [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 %a, i32 %b +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp slt i32 %b, %c +; CHECK-NEXT: [[MIN_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_AB_BC:%.*]] = icmp slt i32 [[MIN_AB]], [[MIN_BC]] +; CHECK-NEXT: [[MIN_ABC:%.*]] = select i1 [[CMP_AB_BC]], i32 [[MIN_AB]], i32 [[MIN_BC]] +; CHECK-NEXT: ret i32 [[MIN_ABC]] +; + %cmp_ab = icmp slt i32 %a, %b + %min_ab = select i1 %cmp_ab, i32 %a, i32 %b + %cmp_bc = icmp slt i32 %b, %c + %min_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ab_bc = icmp slt i32 %min_ab, %min_bc + %min_abc = select i1 %cmp_ab_bc, i32 %min_ab, i32 %min_bc + ret i32 %min_abc +} + +; max(max(a, b), max(c, b)) --> max(max(a, b), c) + +define <2 x i32> @common_factor_smax(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { +; CHECK-LABEL: @common_factor_smax( +; CHECK-NEXT: [[CMP_AB:%.*]] = icmp sgt <2 x i32> %a, %b +; CHECK-NEXT: [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b +; CHECK-NEXT: [[CMP_CB:%.*]] = icmp sgt <2 x i32> %c, %b +; CHECK-NEXT: [[MAX_CB:%.*]] = select <2 x i1> [[CMP_CB]], <2 x i32> %c, <2 x i32> %b +; CHECK-NEXT: [[CMP_AB_CB:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[MAX_CB]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select <2 x i1> [[CMP_AB_CB]], <2 x i32> [[MAX_AB]], <2 x i32> [[MAX_CB]] +; CHECK-NEXT: ret <2 x i32> [[MAX_ABC]] +; + %cmp_ab = icmp sgt <2 x i32> %a, %b + %max_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b + %cmp_cb = icmp sgt <2 x i32> %c, %b + %max_cb = select <2 x i1> %cmp_cb, <2 x i32> %c, <2 x i32> %b + %cmp_ab_cb = icmp sgt <2 x i32> %max_ab, %max_cb + %max_abc = select <2 x i1> %cmp_ab_cb, <2 x i32> %max_ab, <2 x i32> %max_cb + ret <2 x i32> %max_abc +} + +; min(min(b, c), min(a, b)) --> min(min(b, c), a) + +define <2 x i32> @common_factor_umin(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { +; CHECK-LABEL: @common_factor_umin( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ult <2 x i32> %b, %c +; CHECK-NEXT: [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> %b, <2 x i32> %c +; CHECK-NEXT: [[CMP_AB:%.*]] = icmp ult <2 x i32> %a, %b +; CHECK-NEXT: [[MIN_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b +; CHECK-NEXT: [[CMP_BC_AB:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[MIN_AB]] +; CHECK-NEXT: [[MIN_ABC:%.*]] = select <2 x i1> [[CMP_BC_AB]], <2 x i32> [[MIN_BC]], <2 x i32> [[MIN_AB]] +; CHECK-NEXT: ret <2 x i32> [[MIN_ABC]] +; + %cmp_bc = icmp ult <2 x i32> %b, %c + %min_bc = select <2 x i1> %cmp_bc, <2 x i32> %b, <2 x i32> %c + %cmp_ab = icmp ult <2 x i32> %a, %b + %min_ab = select <2 x i1> %cmp_ab, <2 x i32> %a, <2 x i32> %b + %cmp_bc_ab = icmp ult <2 x i32> %min_bc, %min_ab + %min_abc = select <2 x i1> %cmp_bc_ab, <2 x i32> %min_bc, <2 x i32> %min_ab + ret <2 x i32> %min_abc +} + +; max(max(b, c), max(b, a)) --> max(max(b, c), a) + +define i32 @common_factor_umax(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + ret i32 %max_abc +} + +declare void @extra_use(i32) + +define i32 @common_factor_umax_extra_use_lhs(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax_extra_use_lhs( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]]) +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + call void @extra_use(i32 %max_bc) + ret i32 %max_abc +} + +define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax_extra_use_rhs( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]]) +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + call void @extra_use(i32 %max_ba) + ret i32 %max_abc +} + +define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @common_factor_umax_extra_use_both( +; CHECK-NEXT: [[CMP_BC:%.*]] = icmp ugt i32 %b, %c +; CHECK-NEXT: [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c +; CHECK-NEXT: [[CMP_BA:%.*]] = icmp ugt i32 %b, %a +; CHECK-NEXT: [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a +; CHECK-NEXT: [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]] +; CHECK-NEXT: [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]] +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BC]]) +; CHECK-NEXT: call void @extra_use(i32 [[MAX_BA]]) +; CHECK-NEXT: ret i32 [[MAX_ABC]] +; + %cmp_bc = icmp ugt i32 %b, %c + %max_bc = select i1 %cmp_bc, i32 %b, i32 %c + %cmp_ba = icmp ugt i32 %b, %a + %max_ba = select i1 %cmp_ba, i32 %b, i32 %a + %cmp_bc_ba = icmp ugt i32 %max_bc, %max_ba + %max_abc = select i1 %cmp_bc_ba, i32 %max_bc, i32 %max_ba + call void @extra_use(i32 %max_bc) + call void @extra_use(i32 %max_ba) + ret i32 %max_abc +} + diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll index 0851a5d435b8..b94bce2dbb80 100644 --- a/test/Transforms/InstCombine/minmax-fp.ll +++ b/test/Transforms/InstCombine/minmax-fp.ll @@ -155,13 +155,13 @@ define i8 @t13(float %a) { ret i8 %3 } -; <= comparison, where %a could be -0.0. Not safe. +; %a could be -0.0, but it doesn't matter because the conversion to int is the same for 0.0 or -0.0. define i8 @t14(float %a) { ; CHECK-LABEL: @t14( -; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule float %a, 0.000000e+00 -; CHECK-NEXT: [[TMP2:%.*]] = fptosi float %a to i8 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 0 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: [[DOTINV:%.*]] = fcmp oge float %a, 0.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTINV]], float 0.000000e+00, float %a +; CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i8 +; CHECK-NEXT: ret i8 [[TMP2]] ; %1 = fcmp ule float %a, 0.0 %2 = fptosi float %a to i8 @@ -169,6 +169,19 @@ define i8 @t14(float %a) { ret i8 %3 } +define i8 @t14_commute(float %a) { +; CHECK-LABEL: @t14_commute( +; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt float %a, 0.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float %a, float 0.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i8 +; CHECK-NEXT: ret i8 [[TMP3]] +; + %1 = fcmp ule float %a, 0.0 + %2 = fptosi float %a to i8 + %3 = select i1 %1, i8 0, i8 %2 + ret i8 %3 +} + define i8 @t15(float %a) { ; CHECK-LABEL: @t15( ; CHECK-NEXT: [[DOTINV:%.*]] = fcmp nsz oge float %a, 0.000000e+00 diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll index 4e90b337b09c..f71a0fb4c621 100644 --- a/test/Transforms/InstCombine/rem.ll +++ b/test/Transforms/InstCombine/rem.ll @@ -593,3 +593,17 @@ define <2 x i32> @test23(<2 x i32> %A) { %mul = srem <2 x i32> %and, <i32 2147483647, i32 2147483647> ret <2 x i32> %mul } + +; FP division-by-zero is not UB. + +define double @PR34870(i1 %cond, double %x, double %y) { +; CHECK-LABEL: @PR34870( +; CHECK-NEXT: [[SEL:%.*]] = select i1 %cond, double %y, double 0.000000e+00 +; CHECK-NEXT: [[FMOD:%.*]] = frem double %x, [[SEL]] +; CHECK-NEXT: ret double [[FMOD]] +; + %sel = select i1 %cond, double %y, double 0.0 + %fmod = frem double %x, %sel + ret double %fmod +} + diff --git a/test/Transforms/InstSimplify/extract-element.ll b/test/Transforms/InstSimplify/extract-element.ll new file mode 100644 index 000000000000..8ee75a603cd1 --- /dev/null +++ b/test/Transforms/InstSimplify/extract-element.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +; Weird Types + +define i129 @vec_extract_negidx(<3 x i129> %a) { +; CHECK-LABEL: @vec_extract_negidx( +; CHECK-NEXT: [[E1:%.*]] = extractelement <3 x i129> [[A:%.*]], i129 -1 +; CHECK-NEXT: ret i129 [[E1]] +; + %E1 = extractelement <3 x i129> %a, i129 -1 + ret i129 %E1 +} diff --git a/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll b/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll new file mode 100644 index 000000000000..28db1c834062 --- /dev/null +++ b/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll @@ -0,0 +1,128 @@ +; RUN: opt -loop-unroll -unroll-runtime -unroll-runtime-epilog -S %s | FileCheck %s + +; Test that epilogue is tagged with the same debug information as original loop body rather than original loop exit. + +; CHECK: for.body.i: +; CHECK: br i1 {{.*}}, label %lee1.exit.loopexit.unr-lcssa.loopexit, label %for.body.i, !dbg ![[LOOP_LOC:[0-9]+]] +; CHECK: lee1.exit.loopexit.unr-lcssa.loopexit: +; CHECK: br label %lee1.exit.loopexit.unr-lcssa, !dbg ![[LOOP_LOC]] +; CHECK: lee1.exit.loopexit.unr-lcssa: +; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0, !dbg ![[LOOP_LOC]] +; CHECK: br i1 %lcmp.mod, label %for.body.i.epil.preheader, label %lee1.exit.loopexit, !dbg ![[LOOP_LOC]] +; CHECK: for.body.i.epil.preheader: +; CHECK: br label %for.body.i.epil, !dbg ![[LOOP_LOC]] +; CHECK: lee1.exit.loopexit: +; CHECK: br label %lee1.exit, !dbg ![[EXIT_LOC:[0-9]+]] + +; CHECK-DAG: ![[LOOP_LOC]] = !DILocation(line: 5, column: 3, scope: !{{.*}}, inlinedAt: !{{.*}}) +; CHECK-DAG: ![[EXIT_LOC]] = !DILocation(line: 11, column: 12, scope: !{{.*}}, inlinedAt: !{{.*}}) + +; Function Attrs: nounwind readnone +define i32 @goo(i32 %a, i32 %b) local_unnamed_addr #0 !dbg !8 { +entry: + tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !15), !dbg !16 + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !14, metadata !15), !dbg !17 + tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !18, metadata !15), !dbg !26 + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !21, metadata !15), !dbg !28 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !15), !dbg !29 + %cmp7.i = icmp eq i32 %b, 0, !dbg !31 + br i1 %cmp7.i, label %lee1.exit, label %for.body.i.preheader, !dbg !33 + +for.body.i.preheader: ; preds = %entry + br label %for.body.i, !dbg !34 + +for.body.i: ; preds = %for.body.i.preheader, %for.body.i + %i.09.i = phi i32 [ %inc.i, %for.body.i ], [ 0, %for.body.i.preheader ] + %t.08.i = phi i32 [ %add1.i, %for.body.i ], [ 0, %for.body.i.preheader ] + %div.i = sdiv i32 %t.08.i, 2, !dbg !34 + %add.i = add i32 %t.08.i, %a, !dbg !35 + %add1.i = add i32 %add.i, %div.i, !dbg !36 + tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29 + %inc.i = add nuw i32 %i.09.i, 1, !dbg !37 + tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 %inc.i, i64 0, metadata !23, metadata !15), !dbg !30 + tail call void @llvm.dbg.value(metadata i32 %add1.i, i64 0, metadata !22, metadata !15), !dbg !29 + %exitcond.i = icmp eq i32 %inc.i, %b, !dbg !31 + br i1 %exitcond.i, label %lee1.exit.loopexit, label %for.body.i, !dbg !33, !llvm.loop !38 + +lee1.exit.loopexit: ; preds = %for.body.i + %add1.i.lcssa = phi i32 [ %add1.i, %for.body.i ] + br label %lee1.exit, !dbg !41 + +lee1.exit: ; preds = %lee1.exit.loopexit, %entry + %t.0.lcssa.i = phi i32 [ 0, %entry ], [ %add1.i.lcssa, %lee1.exit.loopexit ] + tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !44, metadata !15), !dbg !47 + tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !45, metadata !15), !dbg !48 + %add.i4 = add nsw i32 %b, %a, !dbg !41 + %sub.i = sub nsw i32 %a, %b, !dbg !49 + %mul.i = mul nsw i32 %add.i4, %sub.i, !dbg !50 + %add = add nsw i32 %t.0.lcssa.i, %mul.i, !dbg !51 + ret i32 %add, !dbg !52 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 + +attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+neon,+strict-align,+vfp3,-crypto,-d16,-fp-armv8,-fp-only-sp,-fp16,-vfp4" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/prj/llvm-arm/scratch1/zhaoshiz/bugs/debug-symbol") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 1, !"min_enum_size", i32 4} +!7 = !{!"Snapdragon LLVM ARM Compiler 4.0.5 (based on llvm.org 4.0+)"} +!8 = distinct !DISubprogram(name: "goo", scope: !1, file: !1, line: 23, type: !9, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !12) +!9 = !DISubroutineType(types: !10) +!10 = !{!11, !11, !11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 23, type: !11) +!14 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 23, type: !11) +!15 = !DIExpression() +!16 = !DILocation(line: 23, column: 14, scope: !8) +!17 = !DILocation(line: 23, column: 21, scope: !8) +!18 = !DILocalVariable(name: "a", arg: 1, scope: !19, file: !1, line: 3, type: !11) +!19 = distinct !DISubprogram(name: "lee1", scope: !1, file: !1, line: 3, type: !9, isLocal: true, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20) +!20 = !{!18, !21, !22, !23} +!21 = !DILocalVariable(name: "b", arg: 2, scope: !19, file: !1, line: 3, type: !11) +!22 = !DILocalVariable(name: "t", scope: !19, file: !1, line: 4, type: !11) +!23 = !DILocalVariable(name: "i", scope: !24, file: !1, line: 5, type: !25) +!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 5, column: 3) +!25 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!26 = !DILocation(line: 3, column: 22, scope: !19, inlinedAt: !27) +!27 = distinct !DILocation(line: 24, column: 27, scope: !8) +!28 = !DILocation(line: 3, column: 29, scope: !19, inlinedAt: !27) +!29 = !DILocation(line: 4, column: 7, scope: !19, inlinedAt: !27) +!30 = !DILocation(line: 5, column: 17, scope: !24, inlinedAt: !27) +!31 = !DILocation(line: 5, column: 23, scope: !32, inlinedAt: !27) +!32 = distinct !DILexicalBlock(scope: !24, file: !1, line: 5, column: 3) +!33 = !DILocation(line: 5, column: 3, scope: !24, inlinedAt: !27) +!34 = !DILocation(line: 6, column: 13, scope: !32, inlinedAt: !27) +!35 = !DILocation(line: 6, column: 11, scope: !32, inlinedAt: !27) +!36 = !DILocation(line: 6, column: 7, scope: !32, inlinedAt: !27) +!37 = !DILocation(line: 5, column: 28, scope: !32, inlinedAt: !27) +!38 = distinct !{!38, !39, !40} +!39 = !DILocation(line: 5, column: 3, scope: !24) +!40 = !DILocation(line: 6, column: 14, scope: !24) +!41 = !DILocation(line: 11, column: 12, scope: !42, inlinedAt: !46) +!42 = distinct !DISubprogram(name: "lee2", scope: !1, file: !1, line: 10, type: !9, isLocal: true, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !43) +!43 = !{!44, !45} +!44 = !DILocalVariable(name: "a", arg: 1, scope: !42, file: !1, line: 10, type: !11) +!45 = !DILocalVariable(name: "b", arg: 2, scope: !42, file: !1, line: 10, type: !11) +!46 = distinct !DILocation(line: 24, column: 40, scope: !8) +!47 = !DILocation(line: 10, column: 22, scope: !42, inlinedAt: !46) +!48 = !DILocation(line: 10, column: 29, scope: !42, inlinedAt: !46) +!49 = !DILocation(line: 11, column: 20, scope: !42, inlinedAt: !46) +!50 = !DILocation(line: 11, column: 16, scope: !42, inlinedAt: !46) +!51 = !DILocation(line: 24, column: 38, scope: !8) +!52 = !DILocation(line: 24, column: 3, scope: !8) diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll index d180980c95b9..32463373ca99 100644 --- a/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -13,9 +13,11 @@ ; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]] ; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body ; EPILOG: for.body.epil.preheader: -; EPILOG: br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]] +; EPILOG: br label %for.body.epil, !dbg [[BODY_LOC]] ; EPILOG: for.body.epil: -; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]] +; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC]] +; EPILOG: for.end.loopexit: +; EPILOG: br label %for.end, !dbg [[EXIT_LOC:![0-9]+]] ; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}}) ; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}}) diff --git a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll deleted file mode 100644 index e3d1f6dd2b17..000000000000 --- a/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Test memcpy-memcpy dependencies across invoke edges. - -; Test that memcpyopt works across the non-unwind edge of an invoke. - -define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %temp = alloca i8, i32 64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) - invoke void @invoke_me() - to label %try.cont unwind label %lpad - -lpad: - landingpad { i8*, i32 } - catch i8* null - ret void - -try.cont: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) - ret void -} - -; Test that memcpyopt works across the unwind edge of an invoke. - -define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %temp = alloca i8, i32 64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) - invoke void @invoke_me() - to label %try.cont unwind label %lpad - -lpad: - landingpad { i8*, i32 } - catch i8* null - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) - ret void - -try.cont: - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) -declare i32 @__gxx_personality_v0(...) -declare void @invoke_me() readnone diff --git a/test/Transforms/MemCpyOpt/merge-into-memset.ll b/test/Transforms/MemCpyOpt/merge-into-memset.ll deleted file mode 100644 index fc31038a4e6d..000000000000 --- a/test/Transforms/MemCpyOpt/merge-into-memset.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Update cached non-local dependence information when merging stores into memset. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Don't delete the memcpy in %if.then, even though it depends on an instruction -; which will be deleted. - -; CHECK-LABEL: @foo -define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) { -entry: - %tmp = alloca [50 x i8], align 8 - %tmp4 = bitcast [50 x i8]* %tmp to i8* - %tmp1 = getelementptr inbounds i8, i8* %tmp4, i64 1 - call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5 - store i8 0, i8* %tmp4, align 8, !dbg !5 -; CHECK: call void @llvm.memset.p0i8.i64(i8* nonnull %d, i8 0, i64 10, i32 1, i1 false), !dbg !5 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %tmp1, i8* nonnull %d, i64 10, i32 1, i1 false) - br i1 %c, label %if.then, label %exit - -if.then: -; CHECK: if.then: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* nonnull %tmp4, i64 30, i32 8, i1 false) - br label %exit - -exit: - ret void -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} - -!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "t.rs", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !DILocation(line: 8, column: 5, scope: !6) -!6 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) -!7 = !DISubroutineType(types: !8) -!8 = !{null} diff --git a/test/Transforms/MemCpyOpt/mixed-sizes.ll b/test/Transforms/MemCpyOpt/mixed-sizes.ll deleted file mode 100644 index 9091fe7f56c0..000000000000 --- a/test/Transforms/MemCpyOpt/mixed-sizes.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Handle memcpy-memcpy dependencies of differing sizes correctly. - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Don't delete the second memcpy, even though there's an earlier -; memcpy with a larger size from the same address. - -; CHECK-LABEL: @foo -define i32 @foo(i1 %z) { -entry: - %a = alloca [10 x i32] - %s = alloca [10 x i32] - %0 = bitcast [10 x i32]* %a to i8* - %1 = bitcast [10 x i32]* %s to i8* - call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 0, i64 40, i32 16, i1 false) - %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %a, i64 0, i64 0 - store i32 1, i32* %arrayidx - %scevgep = getelementptr [10 x i32], [10 x i32]* %s, i64 0, i64 1 - %scevgep7 = bitcast i32* %scevgep to i8* - br i1 %z, label %for.body3.lr.ph, label %for.inc7.1 - -for.body3.lr.ph: ; preds = %entry - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 17179869180, i32 4, i1 false) - br label %for.inc7.1 - -for.inc7.1: -; CHECK: for.inc7.1: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %scevgep7, i64 4, i32 4, i1 false) - %2 = load i32, i32* %arrayidx - ret i32 %2 -} - -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1) diff --git a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll deleted file mode 100644 index 5b0510211d9f..000000000000 --- a/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll +++ /dev/null @@ -1,114 +0,0 @@ -; RUN: opt < %s -memcpyopt -S | FileCheck %s -; Make sure memcpy-memcpy dependence is optimized across -; basic blocks (conditional branches and invokes). - -%struct.s = type { i32, i32 } - -@s_foo = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 -@s_baz = private unnamed_addr constant %struct.s { i32 1, i32 2 }, align 4 -@i = external constant i8* - -declare void @qux() -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) -declare void @__cxa_throw(i8*, i8*, i8*) -declare i32 @__gxx_personality_v0(...) -declare i8* @__cxa_begin_catch(i8*) - -; A simple partial redundancy. Test that the second memcpy is optimized -; to copy directly from the original source rather than from the temporary. - -; CHECK-LABEL: @wobble -define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) { -bb: - %temp = alloca i8, i32 64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %temp, i8* nonnull %src, i64 64, i32 8, i1 false) - br i1 %some_condition, label %more, label %out - -out: - call void @qux() - unreachable - -more: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %temp, i64 64, i32 8, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 64, i32 8, i1 false) - ret void -} - -; A CFG triangle with a partial redundancy targeting an alloca. Test that the -; memcpy inside the triangle is optimized to copy directly from the original -; source rather than from the temporary. - -; CHECK-LABEL: @foo -define i32 @foo(i1 %t3) { -bb: - %s = alloca %struct.s, align 4 - %t = alloca %struct.s, align 4 - %s1 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s1, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) - br i1 %t3, label %bb4, label %bb7 - -bb4: ; preds = %bb - %t5 = bitcast %struct.s* %t to i8* - %s6 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* %s6, i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t5, i8* bitcast (%struct.s* @s_foo to i8*), i64 8, i32 4, i1 false) - br label %bb7 - -bb7: ; preds = %bb4, %bb - %t8 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 0 - %t9 = load i32, i32* %t8, align 4 - %t10 = getelementptr %struct.s, %struct.s* %t, i32 0, i32 1 - %t11 = load i32, i32* %t10, align 4 - %t12 = add i32 %t9, %t11 - ret i32 %t12 -} - -; A CFG diamond with an invoke on one side, and a partially redundant memcpy -; into an alloca on the other. Test that the memcpy inside the diamond is -; optimized to copy ; directly from the original source rather than from the -; temporary. This more complex test represents a relatively common usage -; pattern. - -; CHECK-LABEL: @baz -define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -bb: - %s = alloca %struct.s, align 4 - %t = alloca %struct.s, align 4 - %s3 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s3, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) - br i1 %t5, label %bb6, label %bb22 - -bb6: ; preds = %bb - invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) - to label %bb25 unwind label %bb9 - -bb9: ; preds = %bb6 - %t10 = landingpad { i8*, i32 } - catch i8* null - br label %bb13 - -bb13: ; preds = %bb9 - %t15 = call i8* @__cxa_begin_catch(i8* null) - br label %bb23 - -bb22: ; preds = %bb - %t23 = bitcast %struct.s* %t to i8* - %s24 = bitcast %struct.s* %s to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* %s24, i64 8, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t23, i8* bitcast (%struct.s* @s_baz to i8*), i64 8, i32 4, i1 false) - br label %bb23 - -bb23: ; preds = %bb22, %bb13 - %t17 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 0 - %t18 = load i32, i32* %t17, align 4 - %t19 = getelementptr inbounds %struct.s, %struct.s* %t, i32 0, i32 1 - %t20 = load i32, i32* %t19, align 4 - %t21 = add nsw i32 %t18, %t20 - ret i32 %t21 - -bb25: ; preds = %bb6 - unreachable -} diff --git a/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll b/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll new file mode 100644 index 000000000000..57e35ccad638 --- /dev/null +++ b/test/Transforms/RewriteStatepointsForGC/check_traversal_order.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +declare void @f() +declare void @g(i8 addrspace(1)*, i8 addrspace(1)*) +declare i32 @personality_function() + +; Make sure that we do not fail assertion because we process call of @g before +; we process the call of @f. + +define void @test_01(i8 addrspace(1)* %p, i1 %cond) gc "statepoint-example" personality i32 ()* @personality_function { + +; CHECK-LABEL: @test_01( + +entry: + %tmp0 = insertelement <2 x i8 addrspace(1)*> undef, i8 addrspace(1)* %p, i32 0 + %tmp1 = insertelement <2 x i8 addrspace(1)*> %tmp0, i8 addrspace(1)* %p, i32 1 + %tmp2 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 1 + %tmp3 = extractelement <2 x i8 addrspace(1)*> %tmp1, i32 0 + br label %loop + +loop: + br i1 %cond, label %cond_block, label %exit + +cond_block: + br i1 %cond, label %backedge, label %exit + +exit: + %tmp4 = phi i8 addrspace(1)* [ %tmp2, %loop ], [ %tmp2, %cond_block ] + call void @g(i8 addrspace(1)* %tmp3, i8 addrspace(1)* %tmp4) + ret void + +backedge: + call void @f() + br label %loop +} diff --git a/test/Transforms/SimplifyCFG/X86/if-conversion.ll b/test/Transforms/SimplifyCFG/X86/if-conversion.ll deleted file mode 100644 index 28702572d480..000000000000 --- a/test/Transforms/SimplifyCFG/X86/if-conversion.ll +++ /dev/null @@ -1,231 +0,0 @@ -; RUN: opt < %s -simplifycfg -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -S | FileCheck %s -; Avoid if-conversion if there is a long dependence chain. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -; The first several cases test FindLongDependenceChain returns true, so -; if-conversion is blocked. - -define i64 @test1(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-NOT: select -} - -define i64 @test2(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = add i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test2 -; CHECK-NOT: select -} - -; The following cases test FindLongDependenceChain returns false, so -; if-conversion will proceed. - -; Non trivial LatencyAdjustment. -define i64 @test3(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %p4 = add i64 %p3, %1 - %ptr = inttoptr i64 %p4 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test3 -; CHECK: select -} - -; Short dependence chain. -define i64 @test4(i64* %pp, i64* %p) { -entry: - %0 = load i64, i64* %pp, align 8 - %cmp = icmp slt i64 %0, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - ret i64 %val - -; CHECK-LABEL: @test4 -; CHECK: select -} - -; High IPC. -define i64 @test5(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - %2 = add i64 %pint, 2 - %3 = add i64 %pint, 3 - %4 = or i64 %pint, 16 - %5 = and i64 %pint, 255 - - %6 = or i64 %2, 9 - %7 = and i64 %3, 255 - %8 = add i64 %4, 4 - %9 = add i64 %5, 5 - - %10 = add i64 %6, 2 - %11 = add i64 %7, 3 - %12 = add i64 %8, 4 - %13 = add i64 %9, 5 - - %14 = add i64 %10, 6 - %15 = add i64 %11, 7 - %16 = add i64 %12, 8 - %17 = add i64 %13, 9 - - %18 = add i64 %14, 10 - %19 = add i64 %15, 11 - %20 = add i64 %16, 12 - %21 = add i64 %17, 13 - - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - - ret i64 %val - -; CHECK-LABEL: @test5 -; CHECK: select -} - -; Large BB size. -define i64 @test6(i64** %pp, i64* %p) { -entry: - %0 = load i64*, i64** %pp, align 8 - %1 = load i64, i64* %0, align 8 - %cmp = icmp slt i64 %1, 0 - %pint = ptrtoint i64* %p to i64 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: - %p1 = add i64 %pint, 8 - br label %cond.end - -cond.false: - %p2 = or i64 %pint, 16 - br label %cond.end - -cond.end: - %p3 = phi i64 [%p1, %cond.true], [%p2, %cond.false] - %ptr = inttoptr i64 %p3 to i64* - %val = load i64, i64* %ptr, align 8 - %2 = add i64 %pint, 2 - %3 = add i64 %pint, 3 - %4 = add i64 %2, 4 - %5 = add i64 %3, 5 - %6 = add i64 %4, 6 - %7 = add i64 %5, 7 - %8 = add i64 %6, 6 - %9 = add i64 %7, 7 - %10 = add i64 %8, 6 - %11 = add i64 %9, 7 - %12 = add i64 %10, 6 - %13 = add i64 %11, 7 - %14 = add i64 %12, 6 - %15 = add i64 %13, 7 - %16 = add i64 %14, 6 - %17 = add i64 %15, 7 - %18 = add i64 %16, 6 - %19 = add i64 %17, 7 - %20 = add i64 %18, 6 - %21 = add i64 %19, 7 - %22 = add i64 %20, 6 - %23 = add i64 %21, 7 - %24 = add i64 %22, 6 - %25 = add i64 %23, 7 - %26 = add i64 %24, 6 - %27 = add i64 %25, 7 - %28 = add i64 %26, 6 - %29 = add i64 %27, 7 - %30 = add i64 %28, 6 - %31 = add i64 %29, 7 - %32 = add i64 %30, 8 - %33 = add i64 %31, 9 - %34 = add i64 %32, %33 - %35 = and i64 %34, 255 - %res = add i64 %val, %35 - - ret i64 %res - -; CHECK-LABEL: @test6 -; CHECK: select -} diff --git a/test/tools/llvm-cov/cov-comdat.test b/test/tools/llvm-cov/cov-comdat.test index 9d2271636994..e8018d58be62 100644 --- a/test/tools/llvm-cov/cov-comdat.test +++ b/test/tools/llvm-cov/cov-comdat.test @@ -9,7 +9,7 @@ REQUIRES: shell // RUN: llvm-cov show %S/Inputs/binary-formats.v1.linux64l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/tmp,%S/Inputs %S/Inputs/instrprof-comdat.h -dump 2> %t.err | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h // RUN: FileCheck --check-prefix=ERROR -input-file %t.err %s -// ERROR: hash-mismatch: No profile record found for 'main' with hash = 0xA +// ERROR: hash-mismatch: No profile record found for 'main' with hash = 0xa // RUN: llvm-cov show %S/Inputs/binary-formats.v2.linux64l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/root/llvm/test/tools,%S/.. %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h // RUN: llvm-cov show %S/Inputs/binary-formats.v2.linux32l -instr-profile %S/Inputs/elf_binary_comdat.profdata -path-equivalence=/root/llvm/R/../test/tools,%S/.. %S/Inputs/instrprof-comdat.h | FileCheck --check-prefix=HEADER %S/Inputs/instrprof-comdat.h diff --git a/test/tools/llvm-objdump/X86/hex-displacement.test b/test/tools/llvm-objdump/X86/hex-displacement.test index dd2332e572f0..541cca53869b 100644 --- a/test/tools/llvm-objdump/X86/hex-displacement.test +++ b/test/tools/llvm-objdump/X86/hex-displacement.test @@ -3,4 +3,4 @@ # RUN: llvm-objdump -d %p/Inputs/hello.exe.macho-i386 | FileCheck %s -# CHECK: 1f47: e8 00 00 00 00 calll 0 <_main+0xC> +# CHECK: 1f47: e8 00 00 00 00 calll 0 <_main+0xc> diff --git a/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 b/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 Binary files differnew file mode 100644 index 000000000000..4cfc6e25396f --- /dev/null +++ b/test/tools/llvm-readobj/Inputs/needed-libs.obj.coff-am64 diff --git a/test/tools/llvm-readobj/coff-needed-libs.test b/test/tools/llvm-readobj/coff-needed-libs.test new file mode 100644 index 000000000000..deb6bc299eb5 --- /dev/null +++ b/test/tools/llvm-readobj/coff-needed-libs.test @@ -0,0 +1,5 @@ +RUN: llvm-readobj -needed-libs %p/Inputs/needed-libs.obj.coff-am64 | FileCheck %s + +CHECK: NeededLibraries [ +CHECK-NEXT: KERNEL32.dll +CHECK-NEXT: ] |