Vendor import of llvm release_60 branch r324090: - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2018-02-02 17:07:53 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2018-02-02 17:07:53 +0000
commit	6d18171c1901a4db5d3e757a5ba4737fe8789dec (patch)
tree	6adfbc90504e1005368a826374523b46773e1599 /test/CodeGen
parent	4a6a1ccbecd7e34f40b05b4ba0a05d0031dd1eff (diff)

vendor/llvm/llvm-release_60-r324090

Notes

Diffstat (limited to 'test/CodeGen')

-rw-r--r--

test/CodeGen/AMDGPU/smrd.ll

-rw-r--r--

test/CodeGen/Mips/pr36061.ll

-rw-r--r--

test/CodeGen/Mips/unsized-global.ll

-rw-r--r--

test/CodeGen/SPARC/stack-align.ll

-rw-r--r--

test/CodeGen/Thumb/pr35836.ll

-rw-r--r--

test/CodeGen/Thumb/pr35836_2.ll

-rw-r--r--

test/CodeGen/X86/O0-pipeline.ll

-rw-r--r--

test/CodeGen/X86/retpoline-external.ll

166

-rw-r--r--

test/CodeGen/X86/retpoline.ll

367

9 files changed, 749 insertions, 20 deletions

diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 9fd20fd67b8c..420c7b80b8d3 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll

@@ -194,11 +194,7 @@ main_body:

; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:

; GCN-NEXT: %bb.

-; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;

-; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0

-; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;

+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;

define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {

main_body:

%off = add i32 %offset, 4095

@@ -244,16 +240,8 @@ main_body:

; GCN-LABEL: {{^}}smrd_vgpr_merged:

; GCN-NEXT: %bb.

-; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4

-; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28

-; GFX9: buffer_load_dword

+; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4

+; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28

define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {

main_body:

%a1 = add i32 %a, 4

diff --git a/test/CodeGen/Mips/pr36061.ll b/test/CodeGen/Mips/pr36061.ll
new file mode 100644
index 000000000000..6a9aa72aae0e
--- /dev/null
+++ b/test/CodeGen/Mips/pr36061.ll

@@ -0,0 +1,65 @@

+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -target-abi n64 | FileCheck %s --check-prefix=MIPSN64

+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -target-abi n32 | FileCheck %s --check-prefix=MIPSN32

+; Test that powi has its integer argument sign extended on mips64.

+declare double @llvm.powi.f64(double, i32)

+define double @powi(double %value, i32 %power) {

+; MIPSN64-LABEL: powi:

+; MIPSN64: # %bb.0:

+; MIPSN64-NEXT: daddiu $sp, $sp, -16

+; MIPSN64-NEXT: .cfi_def_cfa_offset 16

+; MIPSN64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill

+; MIPSN64-NEXT: .cfi_offset 31, -8

+; MIPSN64-NEXT: jal __powidf2

+; MIPSN64-NEXT: sll $5, $5, 0

+; MIPSN64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload

+; MIPSN64-NEXT: jr $ra

+; MIPSN64-NEXT: daddiu $sp, $sp, 16

+; MIPSN32-LABEL: powi:

+; MIPSN32: # %bb.0:

+; MIPSN32-NEXT: addiu $sp, $sp, -16

+; MIPSN32-NEXT: .cfi_def_cfa_offset 16

+; MIPSN32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill

+; MIPSN32-NEXT: .cfi_offset 31, -8

+; MIPSN32-NEXT: jal __powidf2

+; MIPSN32-NEXT: sll $5, $5, 0

+; MIPSN32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload

+; MIPSN32-NEXT: jr $ra

+; MIPSN32-NEXT: addiu $sp, $sp, 16

+ %1 = tail call double @llvm.powi.f64(double %value, i32 %power)

+ ret double %1

+declare float @llvm.powi.f32(float, i32)

+define float @powfi(float %value, i32 %power) {

+; MIPSN64-LABEL: powfi:

+; MIPSN64: # %bb.0:

+; MIPSN64-NEXT: daddiu $sp, $sp, -16

+; MIPSN64-NEXT: .cfi_def_cfa_offset 16

+; MIPSN64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill

+; MIPSN64-NEXT: .cfi_offset 31, -8

+; MIPSN64-NEXT: jal __powisf2

+; MIPSN64-NEXT: sll $5, $5, 0

+; MIPSN64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload

+; MIPSN64-NEXT: jr $ra

+; MIPSN64-NEXT: daddiu $sp, $sp, 16

+; MIPSN32-LABEL: powfi:

+; MIPSN32: # %bb.0:

+; MIPSN32-NEXT: addiu $sp, $sp, -16

+; MIPSN32-NEXT: .cfi_def_cfa_offset 16

+; MIPSN32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill

+; MIPSN32-NEXT: .cfi_offset 31, -8

+; MIPSN32-NEXT: jal __powisf2

+; MIPSN32-NEXT: sll $5, $5, 0

+; MIPSN32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload

+; MIPSN32-NEXT: jr $ra

+; MIPSN32-NEXT: addiu $sp, $sp, 16

+ %1 = tail call float @llvm.powi.f32(float %value, i32 %power)

+ ret float %1

diff --git a/test/CodeGen/Mips/unsized-global.ll b/test/CodeGen/Mips/unsized-global.ll
new file mode 100644
index 000000000000..a89ecc1fd1cb
--- /dev/null
+++ b/test/CodeGen/Mips/unsized-global.ll

@@ -0,0 +1,22 @@

+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py

+; Check that -mgpopt doesn't crash on unsized externals

+; RUN: llc -mtriple=mips64-unknown-freebsd -mattr=+noabicalls -target-abi n64 -mgpopt -o - %s | FileCheck %s

+%struct.a = type opaque

+@b = external global %struct.a, align 1

+; Function Attrs: norecurse nounwind readnone

+define %struct.a* @d() {

+; CHECK-LABEL: d:

+; CHECK: # %bb.0: # %entry

+; CHECK-NEXT: lui $1, %highest(b)

+; CHECK-NEXT: daddiu $1, $1, %higher(b)

+; CHECK-NEXT: dsll $1, $1, 16

+; CHECK-NEXT: daddiu $1, $1, %hi(b)

+; CHECK-NEXT: dsll $1, $1, 16

+; CHECK-NEXT: jr $ra

+; CHECK-NEXT: daddiu $2, $1, %lo(b)

+entry:

+ ret %struct.a* @b

diff --git a/test/CodeGen/SPARC/stack-align.ll b/test/CodeGen/SPARC/stack-align.ll
index b152e6a038f5..6516fb78e48b 100644
--- a/test/CodeGen/SPARC/stack-align.ll
+++ b/test/CodeGen/SPARC/stack-align.ll

@@ -1,4 +1,5 @@

-; RUN: llc -march=sparc < %s | FileCheck %s

+; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK,CHECK32

+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64

declare void @stack_realign_helper(i32 %a, i32* %b)

;; This is a function where we have a local variable of 64-byte

@@ -7,10 +8,15 @@ declare void @stack_realign_helper(i32 %a, i32* %b)

;; the argument is accessed via frame pointer not stack pointer (to %o0).

;; CHECK-LABEL: stack_realign:

-;; CHECK: andn %sp, 63, %sp

-;; CHECK-NEXT: ld [%fp+92], %o0

-;; CHECK-NEXT: call stack_realign_helper

-;; CHECK-NEXT: add %sp, 128, %o1

+;; CHECK32: andn %sp, 63, %sp

+;; CHECK32-NEXT: ld [%fp+92], %o0

+;; CHECK64: add %sp, 2047, %g1

+;; CHECK64-NEXT: andn %g1, 63, %g1

+;; CHECK64-NEXT: add %g1, -2047, %sp

+;; CHECK64-NEXT: ld [%fp+2227], %o0

+;; CHECK-NEXT: call stack_realign_helper

+;; CHECK32-NEXT: add %sp, 128, %o1

+;; CHECK64-NEXT: add %sp, 2239, %o1

define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {

entry:

diff --git a/test/CodeGen/Thumb/pr35836.ll b/test/CodeGen/Thumb/pr35836.ll
new file mode 100644
index 000000000000..7765e66658a0
--- /dev/null
+++ b/test/CodeGen/Thumb/pr35836.ll

@@ -0,0 +1,56 @@

+; RUN: llc < %s | FileCheck %s

+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"

+target triple = "thumbv5e-none-linux-gnueabi"

+; Function Attrs: norecurse nounwind optsize

+define void @f(i32,i32,i32,i32,i32* %x4p, i32* %x5p, i32* %x6p) {

+if.end:

+ br label %while.body

+while.body:

+ %ll.0100 = phi i64 [ 0, %if.end ], [ %shr32, %while.body ]

+ %add = add nuw nsw i64 %ll.0100, 0

+ %add3 = add nuw nsw i64 %add, 0

+ %shr = lshr i64 %add3, 32

+ %conv7 = zext i32 %0 to i64

+ %conv9 = zext i32 %1 to i64

+ %add10 = add nuw nsw i64 %conv9, %conv7

+ %add11 = add nuw nsw i64 %add10, %shr

+ %shr14 = lshr i64 %add11, 32

+ %conv16 = zext i32 %2 to i64

+ %conv18 = zext i32 %3 to i64

+ %add19 = add nuw nsw i64 %conv18, %conv16

+ %add20 = add nuw nsw i64 %add19, %shr14

+ %conv21 = trunc i64 %add20 to i32

+ store i32 %conv21, i32* %x6p, align 4

+ %shr23 = lshr i64 %add20, 32

+ %x4 = load i32, i32* %x4p, align 4

+ %conv25 = zext i32 %x4 to i64

+ %x5 = load i32, i32* %x5p, align 4

+ %conv27 = zext i32 %x5 to i64

+ %add28 = add nuw nsw i64 %conv27, %conv25

+ %add29 = add nuw nsw i64 %add28, %shr23

+ %shr32 = lshr i64 %add29, 32

+ br label %while.body

+; CHECK: adds r3, r0, r1

+; CHECK: push {r5}

+; CHECK: pop {r1}

+; CHECK: adcs r1, r1

+; CHECK: ldr r0, [sp, #12] @ 4-byte Reload

+; CHECK: ldr r2, [sp, #8] @ 4-byte Reload

+; CHECK: adds r2, r0, r2

+; CHECK: push {r5}

+; CHECK: pop {r4}

+; CHECK: adcs r4, r4

+; CHECK: adds r0, r2, r5

+; CHECK: push {r3}

+; CHECK: pop {r0}

+; CHECK: adcs r0, r4

+; CHECK: ldr r6, [sp, #4] @ 4-byte Reload

+; CHECK: str r0, [r6]

+; CHECK: ldr r0, [r7]

+; CHECK: ldr r6, [sp] @ 4-byte Reload

+; CHECK: ldr r6, [r6]

+; CHECK: adds r0, r6, r0

diff --git a/test/CodeGen/Thumb/pr35836_2.ll b/test/CodeGen/Thumb/pr35836_2.ll
new file mode 100644
index 000000000000..af115e8ce21a
--- /dev/null
+++ b/test/CodeGen/Thumb/pr35836_2.ll

@@ -0,0 +1,57 @@

+; RUN: llc < %s | FileCheck %s

+target datalayout = "e-m:e-p:64:64-i128:64-v128:64:128-a:0:64-n64-S64"

+target triple = "thumbv6---gnueabi"

+; Function Attrs: norecurse nounwind readonly

+define i128 @a(i64* nocapture readonly %z) local_unnamed_addr #0 {

+entry:

+ %0 = load i64, i64* %z, align 4

+ %conv.i = zext i64 %0 to i128

+ %arrayidx1 = getelementptr inbounds i64, i64* %z, i64 2

+ %1 = load i64, i64* %arrayidx1, align 4

+ %conv.i38 = zext i64 %1 to i128

+ %shl.i39 = shl nuw i128 %conv.i38, 64

+ %or = or i128 %shl.i39, %conv.i

+ %arrayidx3 = getelementptr inbounds i64, i64* %z, i64 1

+ %2 = load i64, i64* %arrayidx3, align 4

+ %conv.i37 = zext i64 %2 to i128

+ %arrayidx5 = getelementptr inbounds i64, i64* %z, i64 3

+ %3 = load i64, i64* %arrayidx5, align 4

+ %conv.i35 = zext i64 %3 to i128

+ %shl.i36 = shl nuw i128 %conv.i35, 64

+ %or7 = or i128 %shl.i36, %conv.i37

+ %arrayidx10 = getelementptr inbounds i64, i64* %z, i64 4

+ %4 = load i64, i64* %arrayidx10, align 4

+ %conv.i64 = zext i64 %4 to i128

+ %shl.i33 = shl nuw i128 %conv.i64, 64

+ %or12 = or i128 %shl.i33, %conv.i

+ %arrayidx15 = getelementptr inbounds i64, i64* %z, i64 5

+ %5 = load i64, i64* %arrayidx15, align 4

+ %conv.i30 = zext i64 %5 to i128

+ %shl.i = shl nuw i128 %conv.i30, 64

+ %or17 = or i128 %shl.i, %conv.i37

+ %add = add i128 %or7, %or

+ %add18 = add i128 %or17, %or12

+ %mul = mul i128 %add18, %add

+ ret i128 %mul

+; CHECK: adds r4, r2, r7

+; CHECK: mov r4, r1

+; CHECK: adcs r4, r6

+; CHECK: ldr r4, [sp, #20] @ 4-byte Reload

+; CHECK: adcs r5, r4

+; CHECK: ldr r4, [sp, #24] @ 4-byte Reload

+; CHECK: adcs r3, r4

+; CHECK: adds r4, r2, r7

+; CHECK: adcs r1, r6

+; CHECK: mov r2, sp

+; CHECK: str r4, [r2]

+; CHECK: str r1, [r2, #4]

+; CHECK: ldr r6, [r0, #16]

+; CHECK: ldr r7, [r0, #24]

+; CHECK: adcs r7, r6

+; CHECK: str r7, [r2, #8]

+; CHECK: ldr r6, [r0, #20]

+; CHECK: ldr r0, [r0, #28]

+; CHECK: adcs r0, r6

diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index cb7dabefe45a..3a720a5288a2 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll

@@ -25,6 +25,7 @@

; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)

; CHECK-NEXT: Scalarize Masked Memory Intrinsics

; CHECK-NEXT: Expand reduction intrinsics

+; CHECK-NEXT: Expand indirectbr instructions

; CHECK-NEXT: Rewrite Symbols

; CHECK-NEXT: FunctionPass Manager

; CHECK-NEXT: Dominator Tree Construction

@@ -57,6 +58,7 @@

; CHECK-NEXT: Machine Natural Loop Construction

; CHECK-NEXT: Insert XRay ops

; CHECK-NEXT: Implement the 'patchable-function' attribute

+; CHECK-NEXT: X86 Retpoline Thunks

; CHECK-NEXT: Lazy Machine Block Frequency Analysis

; CHECK-NEXT: Machine Optimization Remark Emitter

; CHECK-NEXT: MachineDominator Tree Construction

diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll
new file mode 100644
index 000000000000..66d32ba5d73d
--- /dev/null
+++ b/test/CodeGen/X86/retpoline-external.ll

@@ -0,0 +1,166 @@

+; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64

+; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST

+; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86

+; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST

+declare void @bar(i32)

+; Test a simple indirect call and tail call.

+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {

+entry:

+ tail call void @bar(i32 %x)

+ tail call void %fp(i32 %x)

+ tail call void @bar(i32 %x)

+ tail call void %fp(i32 %x)

+ ret void

+; X64-LABEL: icall_reg:

+; X64-DAG: movq %rdi, %[[fp:[^ ]*]]

+; X64-DAG: movl %esi, %[[x:[^ ]*]]

+; X64: movl %[[x]], %edi

+; X64: callq bar

+; X64-DAG: movl %[[x]], %edi

+; X64-DAG: movq %[[fp]], %r11

+; X64: callq __llvm_external_retpoline_r11

+; X64: movl %[[x]], %edi

+; X64: callq bar

+; X64-DAG: movl %[[x]], %edi

+; X64-DAG: movq %[[fp]], %r11

+; X64: jmp __llvm_external_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: icall_reg:

+; X64FAST: callq bar

+; X64FAST: callq __llvm_external_retpoline_r11

+; X64FAST: callq bar

+; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL

+; X86-LABEL: icall_reg:

+; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]

+; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]

+; X86: pushl %[[x]]

+; X86: calll bar

+; X86: movl %[[fp]], %eax

+; X86: pushl %[[x]]

+; X86: calll __llvm_external_retpoline_eax

+; X86: pushl %[[x]]

+; X86: calll bar

+; X86: movl %[[fp]], %eax

+; X86: pushl %[[x]]

+; X86: calll __llvm_external_retpoline_eax

+; X86-NOT: # TAILCALL

+; X86FAST-LABEL: icall_reg:

+; X86FAST: calll bar

+; X86FAST: calll __llvm_external_retpoline_eax

+; X86FAST: calll bar

+; X86FAST: calll __llvm_external_retpoline_eax

+@global_fp = external global void (i32)*

+; Test an indirect call through a global variable.

+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {

+ %fp1 = load void (i32)*, void (i32)** @global_fp

+ call void %fp1(i32 %x)

+ %fp2 = load void (i32)*, void (i32)** @global_fp

+ tail call void %fp2(i32 %x)

+ ret void

+; X64-LABEL: icall_global_fp:

+; X64-DAG: movl %edi, %[[x:[^ ]*]]

+; X64-DAG: movq global_fp(%rip), %r11

+; X64: callq __llvm_external_retpoline_r11

+; X64-DAG: movl %[[x]], %edi

+; X64-DAG: movq global_fp(%rip), %r11

+; X64: jmp __llvm_external_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: icall_global_fp:

+; X64FAST: movq global_fp(%rip), %r11

+; X64FAST: callq __llvm_external_retpoline_r11

+; X64FAST: movq global_fp(%rip), %r11

+; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL

+; X86-LABEL: icall_global_fp:

+; X86: movl global_fp, %eax

+; X86: pushl 4(%esp)

+; X86: calll __llvm_external_retpoline_eax

+; X86: addl $4, %esp

+; X86: movl global_fp, %eax

+; X86: jmp __llvm_external_retpoline_eax # TAILCALL

+; X86FAST-LABEL: icall_global_fp:

+; X86FAST: calll __llvm_external_retpoline_eax

+; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL

+%struct.Foo = type { void (%struct.Foo*)** }

+; Test an indirect call through a vtable.

+define void @vcall(%struct.Foo* %obj) #0 {

+ %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0

+ %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field

+ %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1

+ %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot

+ tail call void %fp(%struct.Foo* %obj)

+ ret void

+; X64-LABEL: vcall:

+; X64: movq %rdi, %[[obj:[^ ]*]]

+; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]

+; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]

+; X64: movq %[[fp]], %r11

+; X64: callq __llvm_external_retpoline_r11

+; X64-DAG: movq %[[obj]], %rdi

+; X64-DAG: movq %[[fp]], %r11

+; X64: jmp __llvm_external_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: vcall:

+; X64FAST: callq __llvm_external_retpoline_r11

+; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL

+; X86-LABEL: vcall:

+; X86: movl 8(%esp), %[[obj:[^ ]*]]

+; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]

+; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]

+; X86: movl %[[fp]], %eax

+; X86: pushl %[[obj]]

+; X86: calll __llvm_external_retpoline_eax

+; X86: addl $4, %esp

+; X86: movl %[[fp]], %eax

+; X86: jmp __llvm_external_retpoline_eax # TAILCALL

+; X86FAST-LABEL: vcall:

+; X86FAST: calll __llvm_external_retpoline_eax

+; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL

+declare void @direct_callee()

+define void @direct_tail() #0 {

+ tail call void @direct_callee()

+ ret void

+; X64-LABEL: direct_tail:

+; X64: jmp direct_callee # TAILCALL

+; X64FAST-LABEL: direct_tail:

+; X64FAST: jmp direct_callee # TAILCALL

+; X86-LABEL: direct_tail:

+; X86: jmp direct_callee # TAILCALL

+; X86FAST-LABEL: direct_tail:

+; X86FAST: jmp direct_callee # TAILCALL

+; Lastly check that no thunks were emitted.

+; X64-NOT: __{{.*}}_retpoline_{{.*}}:

+; X64FAST-NOT: __{{.*}}_retpoline_{{.*}}:

+; X86-NOT: __{{.*}}_retpoline_{{.*}}:

+; X86FAST-NOT: __{{.*}}_retpoline_{{.*}}:

+attributes #0 = { "target-features"="+retpoline-external-thunk" }

diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll
new file mode 100644
index 000000000000..57d3388b812a
--- /dev/null
+++ b/test/CodeGen/X86/retpoline.ll

@@ -0,0 +1,367 @@

+; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64

+; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST

+; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86

+; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST

+declare void @bar(i32)

+; Test a simple indirect call and tail call.

+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {

+entry:

+ tail call void @bar(i32 %x)

+ tail call void %fp(i32 %x)

+ tail call void @bar(i32 %x)

+ tail call void %fp(i32 %x)

+ ret void

+; X64-LABEL: icall_reg:

+; X64-DAG: movq %rdi, %[[fp:[^ ]*]]

+; X64-DAG: movl %esi, %[[x:[^ ]*]]

+; X64: movl %[[x]], %edi

+; X64: callq bar

+; X64-DAG: movl %[[x]], %edi

+; X64-DAG: movq %[[fp]], %r11

+; X64: callq __llvm_retpoline_r11

+; X64: movl %[[x]], %edi

+; X64: callq bar

+; X64-DAG: movl %[[x]], %edi

+; X64-DAG: movq %[[fp]], %r11

+; X64: jmp __llvm_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: icall_reg:

+; X64FAST: callq bar

+; X64FAST: callq __llvm_retpoline_r11

+; X64FAST: callq bar

+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL

+; X86-LABEL: icall_reg:

+; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]

+; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]

+; X86: pushl %[[x]]

+; X86: calll bar

+; X86: movl %[[fp]], %eax

+; X86: pushl %[[x]]

+; X86: calll __llvm_retpoline_eax

+; X86: pushl %[[x]]

+; X86: calll bar

+; X86: movl %[[fp]], %eax

+; X86: pushl %[[x]]

+; X86: calll __llvm_retpoline_eax

+; X86-NOT: # TAILCALL

+; X86FAST-LABEL: icall_reg:

+; X86FAST: calll bar

+; X86FAST: calll __llvm_retpoline_eax

+; X86FAST: calll bar

+; X86FAST: calll __llvm_retpoline_eax

+@global_fp = external global void (i32)*

+; Test an indirect call through a global variable.

+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {

+ %fp1 = load void (i32)*, void (i32)** @global_fp

+ call void %fp1(i32 %x)

+ %fp2 = load void (i32)*, void (i32)** @global_fp

+ tail call void %fp2(i32 %x)

+ ret void

+; X64-LABEL: icall_global_fp:

+; X64-DAG: movl %edi, %[[x:[^ ]*]]

+; X64-DAG: movq global_fp(%rip), %r11

+; X64: callq __llvm_retpoline_r11

+; X64-DAG: movl %[[x]], %edi

+; X64-DAG: movq global_fp(%rip), %r11

+; X64: jmp __llvm_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: icall_global_fp:

+; X64FAST: movq global_fp(%rip), %r11

+; X64FAST: callq __llvm_retpoline_r11

+; X64FAST: movq global_fp(%rip), %r11

+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL

+; X86-LABEL: icall_global_fp:

+; X86: movl global_fp, %eax

+; X86: pushl 4(%esp)

+; X86: calll __llvm_retpoline_eax

+; X86: addl $4, %esp

+; X86: movl global_fp, %eax

+; X86: jmp __llvm_retpoline_eax # TAILCALL

+; X86FAST-LABEL: icall_global_fp:

+; X86FAST: calll __llvm_retpoline_eax

+; X86FAST: jmp __llvm_retpoline_eax # TAILCALL

+%struct.Foo = type { void (%struct.Foo*)** }

+; Test an indirect call through a vtable.

+define void @vcall(%struct.Foo* %obj) #0 {

+ %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0

+ %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field

+ %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1

+ %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot

+ tail call void %fp(%struct.Foo* %obj)

+ ret void

+; X64-LABEL: vcall:

+; X64: movq %rdi, %[[obj:[^ ]*]]

+; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]

+; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]

+; X64: movq %[[fp]], %r11

+; X64: callq __llvm_retpoline_r11

+; X64-DAG: movq %[[obj]], %rdi

+; X64-DAG: movq %[[fp]], %r11

+; X64: jmp __llvm_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: vcall:

+; X64FAST: callq __llvm_retpoline_r11

+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL

+; X86-LABEL: vcall:

+; X86: movl 8(%esp), %[[obj:[^ ]*]]

+; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]

+; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]

+; X86: movl %[[fp]], %eax

+; X86: pushl %[[obj]]

+; X86: calll __llvm_retpoline_eax

+; X86: addl $4, %esp

+; X86: movl %[[fp]], %eax

+; X86: jmp __llvm_retpoline_eax # TAILCALL

+; X86FAST-LABEL: vcall:

+; X86FAST: calll __llvm_retpoline_eax

+; X86FAST: jmp __llvm_retpoline_eax # TAILCALL

+declare void @direct_callee()

+define void @direct_tail() #0 {

+ tail call void @direct_callee()

+ ret void

+; X64-LABEL: direct_tail:

+; X64: jmp direct_callee # TAILCALL

+; X64FAST-LABEL: direct_tail:

+; X64FAST: jmp direct_callee # TAILCALL

+; X86-LABEL: direct_tail:

+; X86: jmp direct_callee # TAILCALL

+; X86FAST-LABEL: direct_tail:

+; X86FAST: jmp direct_callee # TAILCALL

+declare void @nonlazybind_callee() #1

+define void @nonlazybind_caller() #0 {

+ call void @nonlazybind_callee()

+ tail call void @nonlazybind_callee()

+ ret void

+; X64-LABEL: nonlazybind_caller:

+; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]

+; X64: movq %[[REG]], %r11

+; X64: callq __llvm_retpoline_r11

+; X64: movq %[[REG]], %r11

+; X64: jmp __llvm_retpoline_r11 # TAILCALL

+; X64FAST-LABEL: nonlazybind_caller:

+; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11

+; X64FAST: callq __llvm_retpoline_r11

+; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11

+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL

+; X86-LABEL: nonlazybind_caller:

+; X86: calll nonlazybind_callee@PLT

+; X86: jmp nonlazybind_callee@PLT # TAILCALL

+; X86FAST-LABEL: nonlazybind_caller:

+; X86FAST: calll nonlazybind_callee@PLT

+; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL

+@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),

+ i8* blockaddress(@indirectbr_rewrite, %bb1),

+ i8* blockaddress(@indirectbr_rewrite, %bb2),

+ i8* blockaddress(@indirectbr_rewrite, %bb3),

+ i8* blockaddress(@indirectbr_rewrite, %bb4),

+ i8* blockaddress(@indirectbr_rewrite, %bb5),

+ i8* blockaddress(@indirectbr_rewrite, %bb6),

+ i8* blockaddress(@indirectbr_rewrite, %bb7),

+ i8* blockaddress(@indirectbr_rewrite, %bb8),

+ i8* blockaddress(@indirectbr_rewrite, %bb9)]

+; Check that when retpolines are enabled a function with indirectbr gets

+; rewritten to use switch, and that in turn doesn't get lowered as a jump

+; table.

+define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {

+; X64-LABEL: indirectbr_rewrite:

+; X64-NOT: jmpq

+; X86-LABEL: indirectbr_rewrite:

+; X86-NOT: jmpl

+entry:

+ %i0 = load i64, i64* %p

+ %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0

+ %target0 = load i8*, i8** %target.i0

+ indirectbr i8* %target0, [label %bb1, label %bb3]

+bb0:

+ store volatile i64 0, i64* %sink

+ br label %latch

+bb1:

+ store volatile i64 1, i64* %sink

+ br label %latch

+bb2:

+ store volatile i64 2, i64* %sink

+ br label %latch

+bb3:

+ store volatile i64 3, i64* %sink

+ br label %latch

+bb4:

+ store volatile i64 4, i64* %sink

+ br label %latch

+bb5:

+ store volatile i64 5, i64* %sink

+ br label %latch

+bb6:

+ store volatile i64 6, i64* %sink

+ br label %latch

+bb7:

+ store volatile i64 7, i64* %sink

+ br label %latch

+bb8:

+ store volatile i64 8, i64* %sink

+ br label %latch

+bb9:

+ store volatile i64 9, i64* %sink

+ br label %latch

+latch:

+ %i.next = load i64, i64* %p

+ %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next

+ %target.next = load i8*, i8** %target.i.next

+ ; Potentially hit a full 10 successors here so that even if we rewrite as

+ ; a switch it will try to be lowered with a jump table.

+ indirectbr i8* %target.next, [label %bb0,

+ label %bb1,

+ label %bb2,

+ label %bb3,

+ label %bb4,

+ label %bb5,

+ label %bb6,

+ label %bb7,

+ label %bb8,

+ label %bb9]

+; Lastly check that the necessary thunks were emitted.

+; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat

+; X64-NEXT: .hidden __llvm_retpoline_r11

+; X64-NEXT: .weak __llvm_retpoline_r11

+; X64: __llvm_retpoline_r11:

+; X64-NEXT: # {{.*}} # %entry

+; X64-NEXT: callq [[CALL_TARGET:.*]]

+; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken

+; X64-NEXT: # %entry

+; X64-NEXT: # =>This Inner Loop Header: Depth=1

+; X64-NEXT: pause

+; X64-NEXT: lfence

+; X64-NEXT: jmp [[CAPTURE_SPEC]]

+; X64-NEXT: .p2align 4, 0x90

+; X64-NEXT: [[CALL_TARGET]]: # Block address taken

+; X64-NEXT: # %entry

+; X64-NEXT: movq %r11, (%rsp)

+; X64-NEXT: retq

+; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat

+; X86-NEXT: .hidden __llvm_retpoline_eax

+; X86-NEXT: .weak __llvm_retpoline_eax

+; X86: __llvm_retpoline_eax:

+; X86-NEXT: # {{.*}} # %entry

+; X86-NEXT: calll [[CALL_TARGET:.*]]

+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: # =>This Inner Loop Header: Depth=1

+; X86-NEXT: pause

+; X86-NEXT: lfence

+; X86-NEXT: jmp [[CAPTURE_SPEC]]

+; X86-NEXT: .p2align 4, 0x90

+; X86-NEXT: [[CALL_TARGET]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: movl %eax, (%esp)

+; X86-NEXT: retl

+; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat

+; X86-NEXT: .hidden __llvm_retpoline_ecx

+; X86-NEXT: .weak __llvm_retpoline_ecx

+; X86: __llvm_retpoline_ecx:

+; X86-NEXT: # {{.*}} # %entry

+; X86-NEXT: calll [[CALL_TARGET:.*]]

+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: # =>This Inner Loop Header: Depth=1

+; X86-NEXT: pause

+; X86-NEXT: lfence

+; X86-NEXT: jmp [[CAPTURE_SPEC]]

+; X86-NEXT: .p2align 4, 0x90

+; X86-NEXT: [[CALL_TARGET]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: movl %ecx, (%esp)

+; X86-NEXT: retl

+; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat

+; X86-NEXT: .hidden __llvm_retpoline_edx

+; X86-NEXT: .weak __llvm_retpoline_edx

+; X86: __llvm_retpoline_edx:

+; X86-NEXT: # {{.*}} # %entry

+; X86-NEXT: calll [[CALL_TARGET:.*]]

+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: # =>This Inner Loop Header: Depth=1

+; X86-NEXT: pause

+; X86-NEXT: lfence

+; X86-NEXT: jmp [[CAPTURE_SPEC]]

+; X86-NEXT: .p2align 4, 0x90

+; X86-NEXT: [[CALL_TARGET]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: movl %edx, (%esp)

+; X86-NEXT: retl

+; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat

+; X86-NEXT: .hidden __llvm_retpoline_push

+; X86-NEXT: .weak __llvm_retpoline_push

+; X86: __llvm_retpoline_push:

+; X86-NEXT: # {{.*}} # %entry

+; X86-NEXT: calll [[CALL_TARGET:.*]]

+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: # =>This Inner Loop Header: Depth=1

+; X86-NEXT: pause

+; X86-NEXT: lfence

+; X86-NEXT: jmp [[CAPTURE_SPEC]]

+; X86-NEXT: .p2align 4, 0x90

+; X86-NEXT: [[CALL_TARGET]]: # Block address taken

+; X86-NEXT: # %entry

+; X86-NEXT: addl $4, %esp

+; X86-NEXT: pushl 4(%esp)

+; X86-NEXT: popl 8(%esp)

+; X86-NEXT: popl (%esp)

+; X86-NEXT: retl

+attributes #0 = { "target-features"="+retpoline" }

+attributes #1 = { nonlazybind }