diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-02-02 17:07:53 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-02-02 17:07:53 +0000 |
| commit | 6d18171c1901a4db5d3e757a5ba4737fe8789dec (patch) | |
| tree | 6adfbc90504e1005368a826374523b46773e1599 /test/CodeGen | |
| parent | 4a6a1ccbecd7e34f40b05b4ba0a05d0031dd1eff (diff) | |
Notes
Diffstat (limited to 'test/CodeGen')
| -rw-r--r-- | test/CodeGen/AMDGPU/smrd.ll | 18 | ||||
| -rw-r--r-- | test/CodeGen/Mips/pr36061.ll | 65 | ||||
| -rw-r--r-- | test/CodeGen/Mips/unsized-global.ll | 22 | ||||
| -rw-r--r-- | test/CodeGen/SPARC/stack-align.ll | 16 | ||||
| -rw-r--r-- | test/CodeGen/Thumb/pr35836.ll | 56 | ||||
| -rw-r--r-- | test/CodeGen/Thumb/pr35836_2.ll | 57 | ||||
| -rw-r--r-- | test/CodeGen/X86/O0-pipeline.ll | 2 | ||||
| -rw-r--r-- | test/CodeGen/X86/retpoline-external.ll | 166 | ||||
| -rw-r--r-- | test/CodeGen/X86/retpoline.ll | 367 |
9 files changed, 749 insertions, 20 deletions
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll index 9fd20fd67b8c..420c7b80b8d3 100644 --- a/test/CodeGen/AMDGPU/smrd.ll +++ b/test/CodeGen/AMDGPU/smrd.ll @@ -194,11 +194,7 @@ main_body: ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm: ; GCN-NEXT: %bb. - -; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; - -; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0 -; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ; +; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: %off = add i32 %offset, 4095 @@ -244,16 +240,8 @@ main_body: ; GCN-LABEL: {{^}}smrd_vgpr_merged: ; GCN-NEXT: %bb. - -; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 -; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 - -; GFX9: buffer_load_dword -; GFX9: buffer_load_dword -; GFX9: buffer_load_dword -; GFX9: buffer_load_dword -; GFX9: buffer_load_dword -; GFX9: buffer_load_dword +; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4 +; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28 define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 { main_body: %a1 = add i32 %a, 4 diff --git a/test/CodeGen/Mips/pr36061.ll b/test/CodeGen/Mips/pr36061.ll new file mode 100644 index 000000000000..6a9aa72aae0e --- /dev/null +++ b/test/CodeGen/Mips/pr36061.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -target-abi n64 | FileCheck %s --check-prefix=MIPSN64 +; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -target-abi n32 | FileCheck %s --check-prefix=MIPSN32 + +; Test that powi has its integer argument sign extended on mips64. + +declare double @llvm.powi.f64(double, i32) + +define double @powi(double %value, i32 %power) { +; MIPSN64-LABEL: powi: +; MIPSN64: # %bb.0: +; MIPSN64-NEXT: daddiu $sp, $sp, -16 +; MIPSN64-NEXT: .cfi_def_cfa_offset 16 +; MIPSN64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPSN64-NEXT: .cfi_offset 31, -8 +; MIPSN64-NEXT: jal __powidf2 +; MIPSN64-NEXT: sll $5, $5, 0 +; MIPSN64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPSN64-NEXT: jr $ra +; MIPSN64-NEXT: daddiu $sp, $sp, 16 +; +; MIPSN32-LABEL: powi: +; MIPSN32: # %bb.0: +; MIPSN32-NEXT: addiu $sp, $sp, -16 +; MIPSN32-NEXT: .cfi_def_cfa_offset 16 +; MIPSN32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPSN32-NEXT: .cfi_offset 31, -8 +; MIPSN32-NEXT: jal __powidf2 +; MIPSN32-NEXT: sll $5, $5, 0 +; MIPSN32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPSN32-NEXT: jr $ra +; MIPSN32-NEXT: addiu $sp, $sp, 16 + %1 = tail call double @llvm.powi.f64(double %value, i32 %power) + ret double %1 +} + +declare float @llvm.powi.f32(float, i32) + +define float @powfi(float %value, i32 %power) { +; MIPSN64-LABEL: powfi: +; MIPSN64: # %bb.0: +; MIPSN64-NEXT: daddiu $sp, $sp, -16 +; MIPSN64-NEXT: .cfi_def_cfa_offset 16 +; MIPSN64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPSN64-NEXT: .cfi_offset 31, -8 +; MIPSN64-NEXT: jal __powisf2 +; MIPSN64-NEXT: sll $5, $5, 0 +; MIPSN64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPSN64-NEXT: jr $ra +; MIPSN64-NEXT: daddiu $sp, $sp, 16 +; +; MIPSN32-LABEL: powfi: +; MIPSN32: # %bb.0: +; MIPSN32-NEXT: addiu $sp, $sp, -16 +; MIPSN32-NEXT: .cfi_def_cfa_offset 16 +; MIPSN32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPSN32-NEXT: .cfi_offset 31, -8 +; MIPSN32-NEXT: jal __powisf2 +; MIPSN32-NEXT: sll $5, $5, 0 +; MIPSN32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPSN32-NEXT: jr $ra +; MIPSN32-NEXT: addiu $sp, $sp, 16 + %1 = tail call float @llvm.powi.f32(float %value, i32 %power) + ret float %1 +} diff --git a/test/CodeGen/Mips/unsized-global.ll b/test/CodeGen/Mips/unsized-global.ll new file mode 100644 index 000000000000..a89ecc1fd1cb --- /dev/null +++ b/test/CodeGen/Mips/unsized-global.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Check that -mgpopt doesn't crash on unsized externals +; RUN: llc -mtriple=mips64-unknown-freebsd -mattr=+noabicalls -target-abi n64 -mgpopt -o - %s | FileCheck %s + +%struct.a = type opaque + +@b = external global %struct.a, align 1 + +; Function Attrs: norecurse nounwind readnone +define %struct.a* @d() { +; CHECK-LABEL: d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui $1, %highest(b) +; CHECK-NEXT: daddiu $1, $1, %higher(b) +; CHECK-NEXT: dsll $1, $1, 16 +; CHECK-NEXT: daddiu $1, $1, %hi(b) +; CHECK-NEXT: dsll $1, $1, 16 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: daddiu $2, $1, %lo(b) +entry: + ret %struct.a* @b +} diff --git a/test/CodeGen/SPARC/stack-align.ll b/test/CodeGen/SPARC/stack-align.ll index b152e6a038f5..6516fb78e48b 100644 --- a/test/CodeGen/SPARC/stack-align.ll +++ b/test/CodeGen/SPARC/stack-align.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=sparc < %s | FileCheck %s +; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64 declare void @stack_realign_helper(i32 %a, i32* %b) ;; This is a function where we have a local variable of 64-byte @@ -7,10 +8,15 @@ declare void @stack_realign_helper(i32 %a, i32* %b) ;; the argument is accessed via frame pointer not stack pointer (to %o0). ;; CHECK-LABEL: stack_realign: -;; CHECK: andn %sp, 63, %sp -;; CHECK-NEXT: ld [%fp+92], %o0 -;; CHECK-NEXT: call stack_realign_helper -;; CHECK-NEXT: add %sp, 128, %o1 +;; CHECK32: andn %sp, 63, %sp +;; CHECK32-NEXT: ld [%fp+92], %o0 +;; CHECK64: add %sp, 2047, %g1 +;; CHECK64-NEXT: andn %g1, 63, %g1 +;; CHECK64-NEXT: add %g1, -2047, %sp +;; CHECK64-NEXT: ld [%fp+2227], %o0 +;; CHECK-NEXT: call stack_realign_helper +;; CHECK32-NEXT: add %sp, 128, %o1 +;; CHECK64-NEXT: add %sp, 2239, %o1 define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) { entry: diff --git a/test/CodeGen/Thumb/pr35836.ll b/test/CodeGen/Thumb/pr35836.ll new file mode 100644 index 000000000000..7765e66658a0 --- /dev/null +++ b/test/CodeGen/Thumb/pr35836.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv5e-none-linux-gnueabi" + +; Function Attrs: norecurse nounwind optsize +define void @f(i32,i32,i32,i32,i32* %x4p, i32* %x5p, i32* %x6p) { +if.end: + br label %while.body + +while.body: + %ll.0100 = phi i64 [ 0, %if.end ], [ %shr32, %while.body ] + %add = add nuw nsw i64 %ll.0100, 0 + %add3 = add nuw nsw i64 %add, 0 + %shr = lshr i64 %add3, 32 + %conv7 = zext i32 %0 to i64 + %conv9 = zext i32 %1 to i64 + %add10 = add nuw nsw i64 %conv9, %conv7 + %add11 = add nuw nsw i64 %add10, %shr + %shr14 = lshr i64 %add11, 32 + %conv16 = zext i32 %2 to i64 + %conv18 = zext i32 %3 to i64 + %add19 = add nuw nsw i64 %conv18, %conv16 + %add20 = add nuw nsw i64 %add19, %shr14 + %conv21 = trunc i64 %add20 to i32 + store i32 %conv21, i32* %x6p, align 4 + %shr23 = lshr i64 %add20, 32 + %x4 = load i32, i32* %x4p, align 4 + %conv25 = zext i32 %x4 to i64 + %x5 = load i32, i32* %x5p, align 4 + %conv27 = zext i32 %x5 to i64 + %add28 = add nuw nsw i64 %conv27, %conv25 + %add29 = add nuw nsw i64 %add28, %shr23 + %shr32 = lshr i64 %add29, 32 + br label %while.body +} +; CHECK: adds r3, r0, r1 +; CHECK: push {r5} +; CHECK: pop {r1} +; CHECK: adcs r1, r1 +; CHECK: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK: adds r2, r0, r2 +; CHECK: push {r5} +; CHECK: pop {r4} +; CHECK: adcs r4, r4 +; CHECK: adds r0, r2, r5 +; CHECK: push {r3} +; CHECK: pop {r0} +; CHECK: adcs r0, r4 +; CHECK: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK: str r0, [r6] +; CHECK: ldr r0, [r7] +; CHECK: ldr r6, [sp] @ 4-byte Reload +; CHECK: ldr r6, [r6] +; CHECK: adds r0, r6, r0 diff --git a/test/CodeGen/Thumb/pr35836_2.ll b/test/CodeGen/Thumb/pr35836_2.ll new file mode 100644 index 000000000000..af115e8ce21a --- /dev/null +++ b/test/CodeGen/Thumb/pr35836_2.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i128:64-v128:64:128-a:0:64-n64-S64" +target triple = "thumbv6---gnueabi" + +; Function Attrs: norecurse nounwind readonly +define i128 @a(i64* nocapture readonly %z) local_unnamed_addr #0 { +entry: + %0 = load i64, i64* %z, align 4 + %conv.i = zext i64 %0 to i128 + %arrayidx1 = getelementptr inbounds i64, i64* %z, i64 2 + %1 = load i64, i64* %arrayidx1, align 4 + %conv.i38 = zext i64 %1 to i128 + %shl.i39 = shl nuw i128 %conv.i38, 64 + %or = or i128 %shl.i39, %conv.i + %arrayidx3 = getelementptr inbounds i64, i64* %z, i64 1 + %2 = load i64, i64* %arrayidx3, align 4 + %conv.i37 = zext i64 %2 to i128 + %arrayidx5 = getelementptr inbounds i64, i64* %z, i64 3 + %3 = load i64, i64* %arrayidx5, align 4 + %conv.i35 = zext i64 %3 to i128 + %shl.i36 = shl nuw i128 %conv.i35, 64 + %or7 = or i128 %shl.i36, %conv.i37 + %arrayidx10 = getelementptr inbounds i64, i64* %z, i64 4 + %4 = load i64, i64* %arrayidx10, align 4 + %conv.i64 = zext i64 %4 to i128 + %shl.i33 = shl nuw i128 %conv.i64, 64 + %or12 = or i128 %shl.i33, %conv.i + %arrayidx15 = getelementptr inbounds i64, i64* %z, i64 5 + %5 = load i64, i64* %arrayidx15, align 4 + %conv.i30 = zext i64 %5 to i128 + %shl.i = shl nuw i128 %conv.i30, 64 + %or17 = or i128 %shl.i, %conv.i37 + %add = add i128 %or7, %or + %add18 = add i128 %or17, %or12 + %mul = mul i128 %add18, %add + ret i128 %mul +} +; CHECK: adds r4, r2, r7 +; CHECK: mov r4, r1 +; CHECK: adcs r4, r6 +; CHECK: ldr r4, [sp, #20] @ 4-byte Reload +; CHECK: adcs r5, r4 +; CHECK: ldr r4, [sp, #24] @ 4-byte Reload +; CHECK: adcs r3, r4 +; CHECK: adds r4, r2, r7 +; CHECK: adcs r1, r6 +; CHECK: mov r2, sp +; CHECK: str r4, [r2] +; CHECK: str r1, [r2, #4] +; CHECK: ldr r6, [r0, #16] +; CHECK: ldr r7, [r0, #24] +; CHECK: adcs r7, r6 +; CHECK: str r7, [r2, #8] +; CHECK: ldr r6, [r0, #20] +; CHECK: ldr r0, [r0, #28] +; CHECK: adcs r0, r6 diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll index cb7dabefe45a..3a720a5288a2 100644 --- a/test/CodeGen/X86/O0-pipeline.ll +++ b/test/CodeGen/X86/O0-pipeline.ll @@ -25,6 +25,7 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining) ; CHECK-NEXT: Scalarize Masked Memory Intrinsics ; CHECK-NEXT: Expand reduction intrinsics +; CHECK-NEXT: Expand indirectbr instructions ; CHECK-NEXT: Rewrite Symbols ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction @@ -57,6 +58,7 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute +; CHECK-NEXT: X86 Retpoline Thunks ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: MachineDominator Tree Construction diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll new file mode 100644 index 000000000000..66d32ba5d73d --- /dev/null +++ b/test/CodeGen/X86/retpoline-external.ll @@ -0,0 +1,166 @@ +; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 +; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST + +; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 +; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST + +declare void @bar(i32) + +; Test a simple indirect call and tail call. +define void @icall_reg(void (i32)* %fp, i32 %x) #0 { +entry: + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + ret void +} + +; X64-LABEL: icall_reg: +; X64-DAG: movq %rdi, %[[fp:[^ ]*]] +; X64-DAG: movl %esi, %[[x:[^ ]*]] +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: callq __llvm_external_retpoline_r11 +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __llvm_external_retpoline_r11 # TAILCALL + +; X64FAST-LABEL: icall_reg: +; X64FAST: callq bar +; X64FAST: callq __llvm_external_retpoline_r11 +; X64FAST: callq bar +; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL + +; X86-LABEL: icall_reg: +; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] +; X86-DAG: movl 16(%esp), %[[x:[^ ]*]] +; X86: pushl %[[x]] +; X86: calll bar +; X86: movl %[[fp]], %eax +; X86: pushl %[[x]] +; X86: calll __llvm_external_retpoline_eax +; X86: pushl %[[x]] +; X86: calll bar +; X86: movl %[[fp]], %eax +; X86: pushl %[[x]] +; X86: calll __llvm_external_retpoline_eax +; X86-NOT: # TAILCALL + +; X86FAST-LABEL: icall_reg: +; X86FAST: calll bar +; X86FAST: calll __llvm_external_retpoline_eax +; X86FAST: calll bar +; X86FAST: calll __llvm_external_retpoline_eax + + +@global_fp = external global void (i32)* + +; Test an indirect call through a global variable. +define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { + %fp1 = load void (i32)*, void (i32)** @global_fp + call void %fp1(i32 %x) + %fp2 = load void (i32)*, void (i32)** @global_fp + tail call void %fp2(i32 %x) + ret void +} + +; X64-LABEL: icall_global_fp: +; X64-DAG: movl %edi, %[[x:[^ ]*]] +; X64-DAG: movq global_fp(%rip), %r11 +; X64: callq __llvm_external_retpoline_r11 +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq global_fp(%rip), %r11 +; X64: jmp __llvm_external_retpoline_r11 # TAILCALL + +; X64FAST-LABEL: icall_global_fp: +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: callq __llvm_external_retpoline_r11 +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL + +; X86-LABEL: icall_global_fp: +; X86: movl global_fp, %eax +; X86: pushl 4(%esp) +; X86: calll __llvm_external_retpoline_eax +; X86: addl $4, %esp +; X86: movl global_fp, %eax +; X86: jmp __llvm_external_retpoline_eax # TAILCALL + +; X86FAST-LABEL: icall_global_fp: +; X86FAST: calll __llvm_external_retpoline_eax +; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL + + +%struct.Foo = type { void (%struct.Foo*)** } + +; Test an indirect call through a vtable. +define void @vcall(%struct.Foo* %obj) #0 { + %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 + %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field + %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 + %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot + tail call void %fp(%struct.Foo* %obj) + tail call void %fp(%struct.Foo* %obj) + ret void +} + +; X64-LABEL: vcall: +; X64: movq %rdi, %[[obj:[^ ]*]] +; X64: movq (%[[obj]]), %[[vptr:[^ ]*]] +; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] +; X64: movq %[[fp]], %r11 +; X64: callq __llvm_external_retpoline_r11 +; X64-DAG: movq %[[obj]], %rdi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __llvm_external_retpoline_r11 # TAILCALL + +; X64FAST-LABEL: vcall: +; X64FAST: callq __llvm_external_retpoline_r11 +; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL + +; X86-LABEL: vcall: +; X86: movl 8(%esp), %[[obj:[^ ]*]] +; X86: movl (%[[obj]]), %[[vptr:[^ ]*]] +; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] +; X86: movl %[[fp]], %eax +; X86: pushl %[[obj]] +; X86: calll __llvm_external_retpoline_eax +; X86: addl $4, %esp +; X86: movl %[[fp]], %eax +; X86: jmp __llvm_external_retpoline_eax # TAILCALL + +; X86FAST-LABEL: vcall: +; X86FAST: calll __llvm_external_retpoline_eax +; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL + + +declare void @direct_callee() + +define void @direct_tail() #0 { + tail call void @direct_callee() + ret void +} + +; X64-LABEL: direct_tail: +; X64: jmp direct_callee # TAILCALL +; X64FAST-LABEL: direct_tail: +; X64FAST: jmp direct_callee # TAILCALL +; X86-LABEL: direct_tail: +; X86: jmp direct_callee # TAILCALL +; X86FAST-LABEL: direct_tail: +; X86FAST: jmp direct_callee # TAILCALL + + +; Lastly check that no thunks were emitted. +; X64-NOT: __{{.*}}_retpoline_{{.*}}: +; X64FAST-NOT: __{{.*}}_retpoline_{{.*}}: +; X86-NOT: __{{.*}}_retpoline_{{.*}}: +; X86FAST-NOT: __{{.*}}_retpoline_{{.*}}: + + +attributes #0 = { "target-features"="+retpoline-external-thunk" } diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll new file mode 100644 index 000000000000..57d3388b812a --- /dev/null +++ b/test/CodeGen/X86/retpoline.ll @@ -0,0 +1,367 @@ +; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 +; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST + +; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 +; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST + +declare void @bar(i32) + +; Test a simple indirect call and tail call. +define void @icall_reg(void (i32)* %fp, i32 %x) #0 { +entry: + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + ret void +} + +; X64-LABEL: icall_reg: +; X64-DAG: movq %rdi, %[[fp:[^ ]*]] +; X64-DAG: movl %esi, %[[x:[^ ]*]] +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: callq __llvm_retpoline_r11 +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __llvm_retpoline_r11 # TAILCALL + +; X64FAST-LABEL: icall_reg: +; X64FAST: callq bar +; X64FAST: callq __llvm_retpoline_r11 +; X64FAST: callq bar +; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL + +; X86-LABEL: icall_reg: +; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] +; X86-DAG: movl 16(%esp), %[[x:[^ ]*]] +; X86: pushl %[[x]] +; X86: calll bar +; X86: movl %[[fp]], %eax +; X86: pushl %[[x]] +; X86: calll __llvm_retpoline_eax +; X86: pushl %[[x]] +; X86: calll bar +; X86: movl %[[fp]], %eax +; X86: pushl %[[x]] +; X86: calll __llvm_retpoline_eax +; X86-NOT: # TAILCALL + +; X86FAST-LABEL: icall_reg: +; X86FAST: calll bar +; X86FAST: calll __llvm_retpoline_eax +; X86FAST: calll bar +; X86FAST: calll __llvm_retpoline_eax + + +@global_fp = external global void (i32)* + +; Test an indirect call through a global variable. +define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { + %fp1 = load void (i32)*, void (i32)** @global_fp + call void %fp1(i32 %x) + %fp2 = load void (i32)*, void (i32)** @global_fp + tail call void %fp2(i32 %x) + ret void +} + +; X64-LABEL: icall_global_fp: +; X64-DAG: movl %edi, %[[x:[^ ]*]] +; X64-DAG: movq global_fp(%rip), %r11 +; X64: callq __llvm_retpoline_r11 +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq global_fp(%rip), %r11 +; X64: jmp __llvm_retpoline_r11 # TAILCALL + +; X64FAST-LABEL: icall_global_fp: +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: callq __llvm_retpoline_r11 +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL + +; X86-LABEL: icall_global_fp: +; X86: movl global_fp, %eax +; X86: pushl 4(%esp) +; X86: calll __llvm_retpoline_eax +; X86: addl $4, %esp +; X86: movl global_fp, %eax +; X86: jmp __llvm_retpoline_eax # TAILCALL + +; X86FAST-LABEL: icall_global_fp: +; X86FAST: calll __llvm_retpoline_eax +; X86FAST: jmp __llvm_retpoline_eax # TAILCALL + + +%struct.Foo = type { void (%struct.Foo*)** } + +; Test an indirect call through a vtable. +define void @vcall(%struct.Foo* %obj) #0 { + %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 + %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field + %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 + %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot + tail call void %fp(%struct.Foo* %obj) + tail call void %fp(%struct.Foo* %obj) + ret void +} + +; X64-LABEL: vcall: +; X64: movq %rdi, %[[obj:[^ ]*]] +; X64: movq (%[[obj]]), %[[vptr:[^ ]*]] +; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] +; X64: movq %[[fp]], %r11 +; X64: callq __llvm_retpoline_r11 +; X64-DAG: movq %[[obj]], %rdi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __llvm_retpoline_r11 # TAILCALL + +; X64FAST-LABEL: vcall: +; X64FAST: callq __llvm_retpoline_r11 +; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL + +; X86-LABEL: vcall: +; X86: movl 8(%esp), %[[obj:[^ ]*]] +; X86: movl (%[[obj]]), %[[vptr:[^ ]*]] +; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] +; X86: movl %[[fp]], %eax +; X86: pushl %[[obj]] +; X86: calll __llvm_retpoline_eax +; X86: addl $4, %esp +; X86: movl %[[fp]], %eax +; X86: jmp __llvm_retpoline_eax # TAILCALL + +; X86FAST-LABEL: vcall: +; X86FAST: calll __llvm_retpoline_eax +; X86FAST: jmp __llvm_retpoline_eax # TAILCALL + + +declare void @direct_callee() + +define void @direct_tail() #0 { + tail call void @direct_callee() + ret void +} + +; X64-LABEL: direct_tail: +; X64: jmp direct_callee # TAILCALL +; X64FAST-LABEL: direct_tail: +; X64FAST: jmp direct_callee # TAILCALL +; X86-LABEL: direct_tail: +; X86: jmp direct_callee # TAILCALL +; X86FAST-LABEL: direct_tail: +; X86FAST: jmp direct_callee # TAILCALL + + +declare void @nonlazybind_callee() #1 + +define void @nonlazybind_caller() #0 { + call void @nonlazybind_callee() + tail call void @nonlazybind_callee() + ret void +} + +; X64-LABEL: nonlazybind_caller: +; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] +; X64: movq %[[REG]], %r11 +; X64: callq __llvm_retpoline_r11 +; X64: movq %[[REG]], %r11 +; X64: jmp __llvm_retpoline_r11 # TAILCALL +; X64FAST-LABEL: nonlazybind_caller: +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: callq __llvm_retpoline_r11 +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL +; X86-LABEL: nonlazybind_caller: +; X86: calll nonlazybind_callee@PLT +; X86: jmp nonlazybind_callee@PLT # TAILCALL +; X86FAST-LABEL: nonlazybind_caller: +; X86FAST: calll nonlazybind_callee@PLT +; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL + + +@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), + i8* blockaddress(@indirectbr_rewrite, %bb1), + i8* blockaddress(@indirectbr_rewrite, %bb2), + i8* blockaddress(@indirectbr_rewrite, %bb3), + i8* blockaddress(@indirectbr_rewrite, %bb4), + i8* blockaddress(@indirectbr_rewrite, %bb5), + i8* blockaddress(@indirectbr_rewrite, %bb6), + i8* blockaddress(@indirectbr_rewrite, %bb7), + i8* blockaddress(@indirectbr_rewrite, %bb8), + i8* blockaddress(@indirectbr_rewrite, %bb9)] + +; Check that when retpolines are enabled a function with indirectbr gets +; rewritten to use switch, and that in turn doesn't get lowered as a jump +; table. +define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { +; X64-LABEL: indirectbr_rewrite: +; X64-NOT: jmpq +; X86-LABEL: indirectbr_rewrite: +; X86-NOT: jmpl +entry: + %i0 = load i64, i64* %p + %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 + %target0 = load i8*, i8** %target.i0 + indirectbr i8* %target0, [label %bb1, label %bb3] + +bb0: + store volatile i64 0, i64* %sink + br label %latch + +bb1: + store volatile i64 1, i64* %sink + br label %latch + +bb2: + store volatile i64 2, i64* %sink + br label %latch + +bb3: + store volatile i64 3, i64* %sink + br label %latch + +bb4: + store volatile i64 4, i64* %sink + br label %latch + +bb5: + store volatile i64 5, i64* %sink + br label %latch + +bb6: + store volatile i64 6, i64* %sink + br label %latch + +bb7: + store volatile i64 7, i64* %sink + br label %latch + +bb8: + store volatile i64 8, i64* %sink + br label %latch + +bb9: + store volatile i64 9, i64* %sink + br label %latch + +latch: + %i.next = load i64, i64* %p + %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next + %target.next = load i8*, i8** %target.i.next + ; Potentially hit a full 10 successors here so that even if we rewrite as + ; a switch it will try to be lowered with a jump table. + indirectbr i8* %target.next, [label %bb0, + label %bb1, + label %bb2, + label %bb3, + label %bb4, + label %bb5, + label %bb6, + label %bb7, + label %bb8, + label %bb9] +} + +; Lastly check that the necessary thunks were emitted. +; +; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat +; X64-NEXT: .hidden __llvm_retpoline_r11 +; X64-NEXT: .weak __llvm_retpoline_r11 +; X64: __llvm_retpoline_r11: +; X64-NEXT: # {{.*}} # %entry +; X64-NEXT: callq [[CALL_TARGET:.*]] +; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken +; X64-NEXT: # %entry +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: pause +; X64-NEXT: lfence +; X64-NEXT: jmp [[CAPTURE_SPEC]] +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: [[CALL_TARGET]]: # Block address taken +; X64-NEXT: # %entry +; X64-NEXT: movq %r11, (%rsp) +; X64-NEXT: retq +; +; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat +; X86-NEXT: .hidden __llvm_retpoline_eax +; X86-NEXT: .weak __llvm_retpoline_eax +; X86: __llvm_retpoline_eax: +; X86-NEXT: # {{.*}} # %entry +; X86-NEXT: calll [[CALL_TARGET:.*]] +; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: pause +; X86-NEXT: lfence +; X86-NEXT: jmp [[CAPTURE_SPEC]] +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: retl +; +; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat +; X86-NEXT: .hidden __llvm_retpoline_ecx +; X86-NEXT: .weak __llvm_retpoline_ecx +; X86: __llvm_retpoline_ecx: +; X86-NEXT: # {{.*}} # %entry +; X86-NEXT: calll [[CALL_TARGET:.*]] +; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: pause +; X86-NEXT: lfence +; X86-NEXT: jmp [[CAPTURE_SPEC]] +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: movl %ecx, (%esp) +; X86-NEXT: retl +; +; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat +; X86-NEXT: .hidden __llvm_retpoline_edx +; X86-NEXT: .weak __llvm_retpoline_edx +; X86: __llvm_retpoline_edx: +; X86-NEXT: # {{.*}} # %entry +; X86-NEXT: calll [[CALL_TARGET:.*]] +; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: pause +; X86-NEXT: lfence +; X86-NEXT: jmp [[CAPTURE_SPEC]] +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: movl %edx, (%esp) +; X86-NEXT: retl +; +; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat +; X86-NEXT: .hidden __llvm_retpoline_push +; X86-NEXT: .weak __llvm_retpoline_push +; X86: __llvm_retpoline_push: +; X86-NEXT: # {{.*}} # %entry +; X86-NEXT: calll [[CALL_TARGET:.*]] +; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: pause +; X86-NEXT: lfence +; X86-NEXT: jmp [[CAPTURE_SPEC]] +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: [[CALL_TARGET]]: # Block address taken +; X86-NEXT: # %entry +; X86-NEXT: addl $4, %esp +; X86-NEXT: pushl 4(%esp) +; X86-NEXT: pushl 4(%esp) +; X86-NEXT: popl 8(%esp) +; X86-NEXT: popl (%esp) +; X86-NEXT: retl + + +attributes #0 = { "target-features"="+retpoline" } +attributes #1 = { nonlazybind } |
