diff options
Diffstat (limited to 'test')
| -rw-r--r-- | test/Bitcode/compatibility-3.6.ll | 4 | ||||
| -rw-r--r-- | test/Bitcode/compatibility-3.7.ll | 4 | ||||
| -rw-r--r-- | test/Bitcode/compatibility-3.8.ll | 8 | ||||
| -rw-r--r-- | test/Bitcode/compatibility-3.9.ll | 8 | ||||
| -rw-r--r-- | test/Bitcode/compatibility-4.0.ll | 8 | ||||
| -rw-r--r-- | test/Bitcode/compatibility-5.0.ll | 8 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir | 131 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir | 88 | ||||
| -rw-r--r-- | test/CodeGen/AMDGPU/smrd.ll | 34 | ||||
| -rw-r--r-- | test/CodeGen/PowerPC/pr36292.ll | 46 | ||||
| -rw-r--r-- | test/CodeGen/X86/clwb.ll | 7 | ||||
| -rw-r--r-- | test/Transforms/InstCombine/pr36362.ll | 17 | ||||
| -rw-r--r-- | test/Transforms/LICM/sinking.ll | 61 | ||||
| -rw-r--r-- | test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll | 2 | ||||
| -rw-r--r-- | test/Transforms/LoopVectorize/pr35773.ll | 2 | ||||
| -rw-r--r-- | test/Transforms/LoopVectorize/reduction-small-size.ll | 37 | ||||
| -rw-r--r-- | test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll | 4 | ||||
| -rw-r--r-- | test/tools/llvm-config/system-libs.windows.test | 2 |
18 files changed, 421 insertions, 50 deletions
diff --git a/test/Bitcode/compatibility-3.6.ll b/test/Bitcode/compatibility-3.6.ll index 6c47a853e24a..e9313dfba870 100644 --- a/test/Bitcode/compatibility-3.6.ll +++ b/test/Bitcode/compatibility-3.6.ll @@ -612,9 +612,7 @@ define void @fastmathflags(float %op1, float %op2) { %f.arcp = fadd arcp float %op1, %op2 ; CHECK: %f.arcp = fadd arcp float %op1, %op2 %f.fast = fadd fast float %op1, %op2 - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. - ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 ret void } diff --git a/test/Bitcode/compatibility-3.7.ll b/test/Bitcode/compatibility-3.7.ll index 55844e5c4986..82fc99055357 100644 --- a/test/Bitcode/compatibility-3.7.ll +++ b/test/Bitcode/compatibility-3.7.ll @@ -656,9 +656,7 @@ define void @fastmathflags(float %op1, float %op2) { %f.arcp = fadd arcp float %op1, %op2 ; CHECK: %f.arcp = fadd arcp float %op1, %op2 %f.fast = fadd fast float %op1, %op2 - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. - ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 ret void } diff --git a/test/Bitcode/compatibility-3.8.ll b/test/Bitcode/compatibility-3.8.ll index a7fa20f2bc08..2e70a380d10e 100644 --- a/test/Bitcode/compatibility-3.8.ll +++ b/test/Bitcode/compatibility-3.8.ll @@ -687,9 +687,7 @@ define void @fastmathflags(float %op1, float %op2) { %f.arcp = fadd arcp float %op1, %op2 ; CHECK: %f.arcp = fadd arcp float %op1, %op2 %f.fast = fadd fast float %op1, %op2 - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. - ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 ret void } @@ -702,9 +700,7 @@ declare <4 x double> @fmf3() ; CHECK-LABEL: fastMathFlagsForCalls( define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { %call.fast = call fast float @fmf1() - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'aml' bits set, so this is not fully 'fast'. - ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1() + ; CHECK: %call.fast = call fast float @fmf1() ; Throw in some other attributes to make sure those stay in the right places. diff --git a/test/Bitcode/compatibility-3.9.ll b/test/Bitcode/compatibility-3.9.ll index c456fefe9d40..7c84daa7d3c4 100644 --- a/test/Bitcode/compatibility-3.9.ll +++ b/test/Bitcode/compatibility-3.9.ll @@ -758,9 +758,7 @@ define void @fastmathflags(float %op1, float %op2) { %f.arcp = fadd arcp float %op1, %op2 ; CHECK: %f.arcp = fadd arcp float %op1, %op2 %f.fast = fadd fast float %op1, %op2 - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. - ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 ret void } @@ -773,9 +771,7 @@ declare <4 x double> @fmf3() ; CHECK-LABEL: fastMathFlagsForCalls( define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { %call.fast = call fast float @fmf1() - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. - ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1() + ; CHECK: %call.fast = call fast float @fmf1() ; Throw in some other attributes to make sure those stay in the right places. diff --git a/test/Bitcode/compatibility-4.0.ll b/test/Bitcode/compatibility-4.0.ll index 68446a7d5b0a..9e34d48c95f7 100644 --- a/test/Bitcode/compatibility-4.0.ll +++ b/test/Bitcode/compatibility-4.0.ll @@ -757,10 +757,8 @@ define void @fastmathflags(float %op1, float %op2) { ; CHECK: %f.nsz = fadd nsz float %op1, %op2 %f.arcp = fadd arcp float %op1, %op2 ; CHECK: %f.arcp = fadd arcp float %op1, %op2 - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. %f.fast = fadd fast float %op1, %op2 - ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 ret void } @@ -773,9 +771,7 @@ declare <4 x double> @fmf3() ; CHECK-LABEL: fastMathFlagsForCalls( define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { %call.fast = call fast float @fmf1() - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'contract' and 'afn' bits set, so this is not fully 'fast'. - ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp float @fmf1() + ; CHECK: %call.fast = call fast float @fmf1() ; Throw in some other attributes to make sure those stay in the right places. diff --git a/test/Bitcode/compatibility-5.0.ll b/test/Bitcode/compatibility-5.0.ll index cdadc032d87b..a4b3fca82b7b 100644 --- a/test/Bitcode/compatibility-5.0.ll +++ b/test/Bitcode/compatibility-5.0.ll @@ -765,9 +765,7 @@ define void @fastmathflags(float %op1, float %op2) { %f.contract = fadd contract float %op1, %op2 ; CHECK: %f.contract = fadd contract float %op1, %op2 %f.fast = fadd fast float %op1, %op2 - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'afn' bit set, so this is not fully 'fast'. - ; CHECK: %f.fast = fadd reassoc nnan ninf nsz arcp contract float %op1, %op2 + ; CHECK: %f.fast = fadd fast float %op1, %op2 ret void } @@ -780,9 +778,7 @@ declare <4 x double> @fmf3() ; CHECK-LABEL: fastMathFlagsForCalls( define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) { %call.fast = call fast float @fmf1() - ; 'fast' used to be its own bit, but this changed in Oct 2017. - ; The binary test file does not have the newer 'afn' bit set, so this is not fully 'fast'. - ; CHECK: %call.fast = call reassoc nnan ninf nsz arcp contract float @fmf1() + ; CHECK: %call.fast = call fast float @fmf1() ; Throw in some other attributes to make sure those stay in the right places. diff --git a/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir b/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir new file mode 100644 index 000000000000..fd1998037d38 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir @@ -0,0 +1,131 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -o - -global-isel -verify-machineinstrs -run-pass=instruction-select %s | FileCheck %s + +# PR36345 +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-arm-none-eabi" + + ; Function Attrs: noinline nounwind optnone + define void @fp16_to_gpr([2 x half], [2 x half]* %addr) { + ret void + } + + define void @gpr_to_fp16() { + ret void + } + + define void @gpr_to_fp16_physreg() { + ret void + } +... +--- +name: fp16_to_gpr +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + - { id: 2, class: fpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } +body: | + bb.1 (%ir-block.1): + liveins: %h0, %h1, %x0 + + ; CHECK-LABEL: name: fp16_to_gpr + ; CHECK: liveins: %h0, %h1, %x0 + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY %h1 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY]], %subreg.hsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]] + ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY2]], 0, 15 + ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[COPY1]], %subreg.hsub + ; CHECK: [[COPY3:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG1]] + ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY3]], 16, 15 + ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[BFMWri1]] + ; CHECK: [[COPY5:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: STRWui [[COPY4]], [[COPY5]], 0 :: (store 4 into %ir.addr, align 2) + ; CHECK: RET_ReallyLR + %1:fpr(s16) = COPY %h0 + %2:fpr(s16) = COPY %h1 + %3:gpr(s32) = G_IMPLICIT_DEF + %11:gpr(s16) = COPY %1(s16) + %4:gpr(s32) = G_INSERT %3, %11(s16), 0 + %12:gpr(s16) = COPY %2(s16) + %5:gpr(s32) = G_INSERT %4, %12(s16), 16 + %0:gpr(s32) = COPY %5(s32) + %6:gpr(p0) = COPY %x0 + G_STORE %0(s32), %6(p0) :: (store 4 into %ir.addr, align 2) + RET_ReallyLR + +... + +--- +name: gpr_to_fp16 +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: fpr } +body: | + bb.1 (%ir-block.0): + liveins: %w0 + + ; CHECK-LABEL: name: gpr_to_fp16 + ; CHECK: liveins: %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[COPY1]] + ; CHECK: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub + ; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]] + ; CHECK: %h0 = COPY [[COPY4]] + ; CHECK: RET_ReallyLR implicit %h0 + %0:gpr(s32) = COPY %w0 + %1:gpr(s16) = G_TRUNC %0(s32) + %2:fpr(s16) = COPY %1(s16) + %h0 = COPY %2(s16) + RET_ReallyLR implicit %h0 + +... +--- +name: gpr_to_fp16_physreg +alignment: 2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } +body: | + bb.1 (%ir-block.0): + liveins: %w0 + + ; CHECK-LABEL: name: gpr_to_fp16_physreg + ; CHECK: liveins: %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[COPY1]] + ; CHECK: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub + ; CHECK: %h0 = COPY [[COPY3]] + ; CHECK: RET_ReallyLR implicit %h0 + %0:gpr(s32) = COPY %w0 + %1:gpr(s16) = G_TRUNC %0(s32) + %h0 = COPY %1(s16) + RET_ReallyLR implicit %h0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir index 33b483511065..1980048eb456 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir @@ -1,8 +1,8 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- -# CHECK-LABEL: name: insert_gprs -name: insert_gprs +name: insert_gprx legalized: true regBankSelected: true @@ -10,26 +10,56 @@ body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: insert_gprx + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 + ; CHECK: [[BFMXri:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[SUBREG_TO_REG1:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 + ; CHECK: [[BFMXri1:%[0-9]+]]:gpr64 = BFMXri [[DEF]], [[SUBREG_TO_REG1]], 51, 31 + ; CHECK: %x0 = COPY [[BFMXri]] + ; CHECK: %x1 = COPY [[BFMXri1]] %0:gpr(s32) = COPY %w0 %1:gpr(s64) = G_IMPLICIT_DEF - ; CHECK: body: - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32 - ; CHECK: %2:gpr64 = BFMXri %1, [[TMP]], 0, 31 %2:gpr(s64) = G_INSERT %1, %0, 0 - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32 - ; CHECK: %3:gpr64 = BFMXri %1, [[TMP]], 51, 31 %3:gpr(s64) = G_INSERT %1, %0, 13 %x0 = COPY %2 %x1 = COPY %3 ... +--- +name: insert_gprw +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %w0, %w1 + ; CHECK-LABEL: name: insert_gprw + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[BFMWri:%[0-9]+]]:gpr32 = BFMWri [[DEF]], [[COPY1]], 0, 15 + ; CHECK: [[BFMWri1:%[0-9]+]]:gpr32 = BFMWri [[BFMWri]], [[COPY2]], 16, 15 + ; CHECK: [[COPY3:%[0-9]+]]:gpr32all = COPY [[BFMWri1]] + ; CHECK: %w0 = COPY [[COPY3]] + %1:gpr(s32) = COPY %w0 + %2:gpr(s32) = COPY %w1 + %3:gpr(s16) = G_TRUNC %1(s32) + %4:gpr(s16) = G_TRUNC %1(s32) + %5:gpr(s32) = G_IMPLICIT_DEF + %6:gpr(s32) = G_INSERT %5, %3(s16), 0 + %7:gpr(s32) = G_INSERT %6, %4(s16), 16 + %0:gpr(s32) = COPY %7(s32) + %w0 = COPY %0 +... --- -# CHECK-LABEL: name: extract_gprs name: extract_gprs legalized: true regBankSelected: true @@ -38,17 +68,49 @@ body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: extract_gprs + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 0, 31 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[UBFMXri]].sub_32 + ; CHECK: [[UBFMXri1:%[0-9]+]]:gpr64 = UBFMXri [[COPY]], 13, 44 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[UBFMXri1]].sub_32 + ; CHECK: %w0 = COPY [[COPY1]] + ; CHECK: %w1 = COPY [[COPY2]] %0:gpr(s64) = COPY %x0 - ; CHECK: body: - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 0, 31 - ; CHECK: %1:gpr32 = COPY [[TMP]].sub_32 %1:gpr(s32) = G_EXTRACT %0, 0 - ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 13, 44 - ; CHECK: %2:gpr32 = COPY [[TMP]].sub_32 %2:gpr(s32) = G_EXTRACT %0, 13 %w0 = COPY %1 %w1 = COPY %2 ... + +--- +name: extract_gprw +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: %w0 + + ; CHECK-LABEL: name: extract_gprw + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15 + ; CHECK: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 15, 30 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[UBFMWri]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: %h0 = COPY [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]] + ; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]].hsub + ; CHECK: %h1 = COPY [[COPY4]] + %0:gpr(s32) = COPY %w0 + + %1:gpr(s16) = G_EXTRACT %0, 0 + + %2:gpr(s16) = G_EXTRACT %0, 15 + + %h0 = COPY %1 + %h1 = COPY %2 +... diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll index 420c7b80b8d3..adf22323ae65 100644 --- a/test/CodeGen/AMDGPU/smrd.ll +++ b/test/CodeGen/AMDGPU/smrd.ll @@ -261,8 +261,42 @@ main_body: ret void } +; GCN-LABEL: {{^}}smrd_sgpr_descriptor_promoted +; GCN: v_readfirstlane +define amdgpu_cs void @smrd_sgpr_descriptor_promoted([0 x i8] addrspace(2)* inreg noalias dereferenceable(18446744073709551615), i32) #0 { +main_body: + %descptr = bitcast [0 x i8] addrspace(2)* %0 to <4 x i32> addrspace(2)*, !amdgpu.uniform !0 + br label %.outer_loop_header + +ret_block: ; preds = %.outer, %.label22, %main_body + ret void + +.outer_loop_header: + br label %.inner_loop_header + +.inner_loop_header: ; preds = %.inner_loop_body, %.outer_loop_header + %loopctr.1 = phi i32 [ 0, %.outer_loop_header ], [ %loopctr.2, %.inner_loop_body ] + %loopctr.2 = add i32 %loopctr.1, 1 + %inner_br1 = icmp slt i32 %loopctr.2, 10 + br i1 %inner_br1, label %.inner_loop_body, label %ret_block + +.inner_loop_body: + %descriptor = load <4 x i32>, <4 x i32> addrspace(2)* %descptr, align 16, !invariant.load !0 + %load1result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 0) + %inner_br2 = icmp uge i32 %1, 10 + br i1 %inner_br2, label %.inner_loop_header, label %.outer_loop_body + +.outer_loop_body: + %offset = shl i32 %loopctr.2, 6 + %load2result = call float @llvm.SI.load.const.v4i32(<4 x i32> %descriptor, i32 %offset) + %outer_br = fcmp ueq float %load2result, 0x0 + br i1 %outer_br, label %.outer_loop_header, label %ret_block +} + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } + +!0 = !{} diff --git a/test/CodeGen/PowerPC/pr36292.ll b/test/CodeGen/PowerPC/pr36292.ll new file mode 100644 index 000000000000..a171918b9e07 --- /dev/null +++ b/test/CodeGen/PowerPC/pr36292.ll @@ -0,0 +1,46 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown < %s | \ +; RUN: FileCheck %s --implicit-check-not=mtctr --implicit-check-not=bdnz +$test = comdat any + +; No CTR loop due to frem (since it is always a call). +define void @test() #0 comdat { +; CHECK-LABEL: test: +; CHECK: ld 29, 0(3) +; CHECK: ld 30, 40(1) +; CHECK: xxlxor 31, 31, 31 +; CHECK: cmpld 30, 29 +; CHECK-NEXT: bge- 0, .LBB0_2 +; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: .LBB0_1: # %bounds.ok +; CHECK: fmr 1, 31 +; CHECK-NEXT: lfsx 2, 0, 3 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: nop +; CHECK-NEXT: addi 30, 30, 1 +; CHECK-NEXT: stfsx 1, 0, 3 +; CHECK-NEXT: cmpld 30, 29 +; CHECK-NEXT: blt+ 0, .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %bounds.fail +; CHECK-NEXT: std 30, 40(1) + %pos = alloca i64, align 8 + br label %forcond + +forcond: ; preds = %bounds.ok, %0 + %1 = load i64, i64* %pos + %.len1 = load i64, i64* undef + %bounds.cmp = icmp ult i64 %1, %.len1 + br i1 %bounds.cmp, label %bounds.ok, label %bounds.fail + +bounds.ok: ; preds = %forcond + %2 = load float, float* undef + %3 = frem float 0.000000e+00, %2 + store float %3, float* undef + %4 = load i64, i64* %pos + %5 = add i64 %4, 1 + store i64 %5, i64* %pos + br label %forcond + +bounds.fail: ; preds = %forcond + unreachable +} + diff --git a/test/CodeGen/X86/clwb.ll b/test/CodeGen/X86/clwb.ll index 0bbb14917f7f..e5906c6ce68c 100644 --- a/test/CodeGen/X86/clwb.ll +++ b/test/CodeGen/X86/clwb.ll @@ -1,5 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: clwb is available in Skylake Server, not available in the newer +; NOTE: Cannon Lake arch, but available again in the newer Ice Lake arch. ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=clwb | FileCheck %s +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s +; RUN: not llc < %s -mtriple=i686-apple-darwin -mcpu=cannonlake 2>&1 | FileCheck %s --check-prefix=CNL +; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=icelake | FileCheck %s + +; CNL: LLVM ERROR: Cannot select: intrinsic %llvm.x86.clwb define void @clwb(i8* %p) nounwind { ; CHECK-LABEL: clwb: diff --git a/test/Transforms/InstCombine/pr36362.ll b/test/Transforms/InstCombine/pr36362.ll new file mode 100644 index 000000000000..412691543a15 --- /dev/null +++ b/test/Transforms/InstCombine/pr36362.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +;RUN: opt -instcombine -S %s | FileCheck %s + +; We shouldn't remove the select before the srem +define i32 @foo(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[A:%.*]], i32 [[B:%.*]], i32 -1 +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[C:%.*]], [[SEL1]] +; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[A]], i32 [[REM]], i32 0 +; CHECK-NEXT: ret i32 [[SEL2]] +; + %sel1 = select i1 %a, i32 %b, i32 -1 + %rem = srem i32 %c, %sel1 + %sel2 = select i1 %a, i32 %rem, i32 0 + ret i32 %sel2 +} + diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll index b28eea0bc2aa..6d747877c58e 100644 --- a/test/Transforms/LICM/sinking.ll +++ b/test/Transforms/LICM/sinking.ll @@ -670,6 +670,67 @@ try.cont: ret void } +; The sinkable call should be sunk into an exit block split. After splitting +; the exit block, BlockColor for new blocks should be added properly so +; that we should be able to access valid ColorVector. +; +; CHECK-LABEL:@test21_pr36184 +; CHECK-LABEL: Loop +; CHECK-NOT: %sinkableCall +; CHECK-LABEL:Out.split.loop.exit +; CHECK: %sinkableCall +define i32 @test21_pr36184(i8* %P) personality i32 (...)* @__CxxFrameHandler3 { +entry: + br label %loop.ph + +loop.ph: + br label %Loop + +Loop: + %sinkableCall = call i32 @strlen( i8* %P ) readonly + br i1 undef, label %ContLoop, label %Out + +ContLoop: + br i1 undef, label %Loop, label %Out + +Out: + %idx = phi i32 [ %sinkableCall, %Loop ], [0, %ContLoop ] + ret i32 %idx +} + +; We do not support splitting a landingpad block if BlockColors is not empty. +; CHECK-LABEL: @test22 +; CHECK-LABEL: while.body2 +; CHECK-LABEL: %mul +; CHECK-NOT: lpadBB.split{{.*}} +define void @test22(i1 %b, i32 %v1, i32 %v2) personality i32 (...)* @__CxxFrameHandler3 { +entry: + br label %while.cond +while.cond: + br i1 %b, label %try.cont, label %while.body + +while.body: + invoke void @may_throw() + to label %while.body2 unwind label %lpadBB + +while.body2: + %v = call i32 @getv() + %mul = mul i32 %v, %v2 + invoke void @may_throw2() + to label %while.cond unwind label %lpadBB +lpadBB: + %.lcssa1 = phi i32 [ 0, %while.body ], [ %mul, %while.body2 ] + landingpad { i8*, i32 } + catch i8* null + br label %lpadBBSucc1 + +lpadBBSucc1: + ret void + +try.cont: + ret void +} + declare void @may_throw() declare void @may_throw2() declare i32 @__CxxFrameHandler3(...) diff --git a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index d9c9632be047..08d163fe6299 100644 --- a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -scev-version-unknown < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/Transforms/LoopVectorize/pr35773.ll b/test/Transforms/LoopVectorize/pr35773.ll index 362ece70b898..308bb393cc4e 100644 --- a/test/Transforms/LoopVectorize/pr35773.ll +++ b/test/Transforms/LoopVectorize/pr35773.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s 2>&1 | FileCheck %s +; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -scev-version-unknown < %s 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @a = common local_unnamed_addr global i32 0, align 4 @b = common local_unnamed_addr global i8 0, align 1 diff --git a/test/Transforms/LoopVectorize/reduction-small-size.ll b/test/Transforms/LoopVectorize/reduction-small-size.ll index b44beb8ce68f..879f1c3c5ad4 100644 --- a/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NEXT: [[TMP17]] = zext <4 x i8> [[TMP16]] to <4 x i32> ; CHECK-NEXT: br i1 {{.*}}, label %middle.block, label %vector.body ; -define void @PR34687(i1 %c, i32 %x, i32 %n) { +define i8 @PR34687(i1 %c, i32 %x, i32 %n) { entry: br label %for.body @@ -36,5 +36,38 @@ if.end: for.end: %tmp2 = phi i32 [ %r.next, %if.end ] - ret void + %tmp3 = trunc i32 %tmp2 to i8 + ret i8 %tmp3 +} + +; CHECK-LABEL: @PR35734( +; CHECK: vector.ph: +; CHECK: [[TMP3:%.*]] = insertelement <4 x i32> zeroinitializer, i32 %y, i32 0 +; CHECK-NEXT: br label %vector.body +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP3]], %vector.ph ], [ [[TMP9:%.*]], %vector.body ] +; CHECK: [[TMP5:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1> +; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1> +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1> +; CHECK-NEXT: [[TMP9]] = sext <4 x i1> [[TMP8]] to <4 x i32> +; CHECK-NEXT: br i1 {{.*}}, label %middle.block, label %vector.body +; +define i32 @PR35734(i32 %x, i32 %y) { +entry: + br label %for.body + +for.body: + %i = phi i32 [ %x, %entry ], [ %i.next, %for.body ] + %r = phi i32 [ %y, %entry ], [ %r.next, %for.body ] + %tmp0 = and i32 %r, 1 + %r.next = add i32 %tmp0, -1 + %i.next = add nsw i32 %i, 1 + %cond = icmp sgt i32 %i, 77 + br i1 %cond, label %for.end, label %for.body + +for.end: + %tmp1 = phi i32 [ %r.next, %for.body ] + ret i32 %tmp1 } diff --git a/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll b/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll index 4ddc6a652179..f7877245b0d4 100644 --- a/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll +++ b/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8 -; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1 +; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -scev-version-unknown < %s | FileCheck %s -check-prefix=VF8 +; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -scev-version-unknown < %s | FileCheck %s -check-prefix=VF1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/test/tools/llvm-config/system-libs.windows.test b/test/tools/llvm-config/system-libs.windows.test index 2c6e03afa2d9..09970cf68994 100644 --- a/test/tools/llvm-config/system-libs.windows.test +++ b/test/tools/llvm-config/system-libs.windows.test @@ -2,6 +2,6 @@ RUN: llvm-config --link-static --system-libs 2>&1 | FileCheck %s REQUIRES: static-libs REQUIRES: system-windows CHECK-NOT: -l -CHECK: psapi.lib shell32.lib ole32.lib uuid.lib +CHECK: psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib CHECK-NOT: error CHECK-NOT: warning |
