aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-02-02 17:07:53 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-02-02 17:07:53 +0000
commit6d18171c1901a4db5d3e757a5ba4737fe8789dec (patch)
tree6adfbc90504e1005368a826374523b46773e1599 /test/CodeGen
parent4a6a1ccbecd7e34f40b05b4ba0a05d0031dd1eff (diff)
Notes
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AMDGPU/smrd.ll18
-rw-r--r--test/CodeGen/Mips/pr36061.ll65
-rw-r--r--test/CodeGen/Mips/unsized-global.ll22
-rw-r--r--test/CodeGen/SPARC/stack-align.ll16
-rw-r--r--test/CodeGen/Thumb/pr35836.ll56
-rw-r--r--test/CodeGen/Thumb/pr35836_2.ll57
-rw-r--r--test/CodeGen/X86/O0-pipeline.ll2
-rw-r--r--test/CodeGen/X86/retpoline-external.ll166
-rw-r--r--test/CodeGen/X86/retpoline.ll367
9 files changed, 749 insertions, 20 deletions
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 9fd20fd67b8c..420c7b80b8d3 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -194,11 +194,7 @@ main_body:
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
-
-; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
-; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
%off = add i32 %offset, 4095
@@ -244,16 +240,8 @@ main_body:
; GCN-LABEL: {{^}}smrd_vgpr_merged:
; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
-; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
-
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
+; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
+; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
main_body:
%a1 = add i32 %a, 4
diff --git a/test/CodeGen/Mips/pr36061.ll b/test/CodeGen/Mips/pr36061.ll
new file mode 100644
index 000000000000..6a9aa72aae0e
--- /dev/null
+++ b/test/CodeGen/Mips/pr36061.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -target-abi n64 | FileCheck %s --check-prefix=MIPSN64
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -target-abi n32 | FileCheck %s --check-prefix=MIPSN32
+
+; Test that powi has its integer argument sign extended on mips64.
+
+declare double @llvm.powi.f64(double, i32)
+
+define double @powi(double %value, i32 %power) {
+; MIPSN64-LABEL: powi:
+; MIPSN64: # %bb.0:
+; MIPSN64-NEXT: daddiu $sp, $sp, -16
+; MIPSN64-NEXT: .cfi_def_cfa_offset 16
+; MIPSN64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPSN64-NEXT: .cfi_offset 31, -8
+; MIPSN64-NEXT: jal __powidf2
+; MIPSN64-NEXT: sll $5, $5, 0
+; MIPSN64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPSN64-NEXT: jr $ra
+; MIPSN64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPSN32-LABEL: powi:
+; MIPSN32: # %bb.0:
+; MIPSN32-NEXT: addiu $sp, $sp, -16
+; MIPSN32-NEXT: .cfi_def_cfa_offset 16
+; MIPSN32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPSN32-NEXT: .cfi_offset 31, -8
+; MIPSN32-NEXT: jal __powidf2
+; MIPSN32-NEXT: sll $5, $5, 0
+; MIPSN32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPSN32-NEXT: jr $ra
+; MIPSN32-NEXT: addiu $sp, $sp, 16
+ %1 = tail call double @llvm.powi.f64(double %value, i32 %power)
+ ret double %1
+}
+
+declare float @llvm.powi.f32(float, i32)
+
+define float @powfi(float %value, i32 %power) {
+; MIPSN64-LABEL: powfi:
+; MIPSN64: # %bb.0:
+; MIPSN64-NEXT: daddiu $sp, $sp, -16
+; MIPSN64-NEXT: .cfi_def_cfa_offset 16
+; MIPSN64-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPSN64-NEXT: .cfi_offset 31, -8
+; MIPSN64-NEXT: jal __powisf2
+; MIPSN64-NEXT: sll $5, $5, 0
+; MIPSN64-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPSN64-NEXT: jr $ra
+; MIPSN64-NEXT: daddiu $sp, $sp, 16
+;
+; MIPSN32-LABEL: powfi:
+; MIPSN32: # %bb.0:
+; MIPSN32-NEXT: addiu $sp, $sp, -16
+; MIPSN32-NEXT: .cfi_def_cfa_offset 16
+; MIPSN32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPSN32-NEXT: .cfi_offset 31, -8
+; MIPSN32-NEXT: jal __powisf2
+; MIPSN32-NEXT: sll $5, $5, 0
+; MIPSN32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPSN32-NEXT: jr $ra
+; MIPSN32-NEXT: addiu $sp, $sp, 16
+ %1 = tail call float @llvm.powi.f32(float %value, i32 %power)
+ ret float %1
+}
diff --git a/test/CodeGen/Mips/unsized-global.ll b/test/CodeGen/Mips/unsized-global.ll
new file mode 100644
index 000000000000..a89ecc1fd1cb
--- /dev/null
+++ b/test/CodeGen/Mips/unsized-global.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Check that -mgpopt doesn't crash on unsized externals
+; RUN: llc -mtriple=mips64-unknown-freebsd -mattr=+noabicalls -target-abi n64 -mgpopt -o - %s | FileCheck %s
+
+%struct.a = type opaque
+
+@b = external global %struct.a, align 1
+
+; Function Attrs: norecurse nounwind readnone
+define %struct.a* @d() {
+; CHECK-LABEL: d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lui $1, %highest(b)
+; CHECK-NEXT: daddiu $1, $1, %higher(b)
+; CHECK-NEXT: dsll $1, $1, 16
+; CHECK-NEXT: daddiu $1, $1, %hi(b)
+; CHECK-NEXT: dsll $1, $1, 16
+; CHECK-NEXT: jr $ra
+; CHECK-NEXT: daddiu $2, $1, %lo(b)
+entry:
+ ret %struct.a* @b
+}
diff --git a/test/CodeGen/SPARC/stack-align.ll b/test/CodeGen/SPARC/stack-align.ll
index b152e6a038f5..6516fb78e48b 100644
--- a/test/CodeGen/SPARC/stack-align.ll
+++ b/test/CodeGen/SPARC/stack-align.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=sparc < %s | FileCheck %s
+; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK64
declare void @stack_realign_helper(i32 %a, i32* %b)
;; This is a function where we have a local variable of 64-byte
@@ -7,10 +8,15 @@ declare void @stack_realign_helper(i32 %a, i32* %b)
;; the argument is accessed via frame pointer not stack pointer (to %o0).
;; CHECK-LABEL: stack_realign:
-;; CHECK: andn %sp, 63, %sp
-;; CHECK-NEXT: ld [%fp+92], %o0
-;; CHECK-NEXT: call stack_realign_helper
-;; CHECK-NEXT: add %sp, 128, %o1
+;; CHECK32: andn %sp, 63, %sp
+;; CHECK32-NEXT: ld [%fp+92], %o0
+;; CHECK64: add %sp, 2047, %g1
+;; CHECK64-NEXT: andn %g1, 63, %g1
+;; CHECK64-NEXT: add %g1, -2047, %sp
+;; CHECK64-NEXT: ld [%fp+2227], %o0
+;; CHECK-NEXT: call stack_realign_helper
+;; CHECK32-NEXT: add %sp, 128, %o1
+;; CHECK64-NEXT: add %sp, 2239, %o1
define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
entry:
diff --git a/test/CodeGen/Thumb/pr35836.ll b/test/CodeGen/Thumb/pr35836.ll
new file mode 100644
index 000000000000..7765e66658a0
--- /dev/null
+++ b/test/CodeGen/Thumb/pr35836.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv5e-none-linux-gnueabi"
+
+; Function Attrs: norecurse nounwind optsize
+define void @f(i32,i32,i32,i32,i32* %x4p, i32* %x5p, i32* %x6p) {
+if.end:
+ br label %while.body
+
+while.body:
+ %ll.0100 = phi i64 [ 0, %if.end ], [ %shr32, %while.body ]
+ %add = add nuw nsw i64 %ll.0100, 0
+ %add3 = add nuw nsw i64 %add, 0
+ %shr = lshr i64 %add3, 32
+ %conv7 = zext i32 %0 to i64
+ %conv9 = zext i32 %1 to i64
+ %add10 = add nuw nsw i64 %conv9, %conv7
+ %add11 = add nuw nsw i64 %add10, %shr
+ %shr14 = lshr i64 %add11, 32
+ %conv16 = zext i32 %2 to i64
+ %conv18 = zext i32 %3 to i64
+ %add19 = add nuw nsw i64 %conv18, %conv16
+ %add20 = add nuw nsw i64 %add19, %shr14
+ %conv21 = trunc i64 %add20 to i32
+ store i32 %conv21, i32* %x6p, align 4
+ %shr23 = lshr i64 %add20, 32
+ %x4 = load i32, i32* %x4p, align 4
+ %conv25 = zext i32 %x4 to i64
+ %x5 = load i32, i32* %x5p, align 4
+ %conv27 = zext i32 %x5 to i64
+ %add28 = add nuw nsw i64 %conv27, %conv25
+ %add29 = add nuw nsw i64 %add28, %shr23
+ %shr32 = lshr i64 %add29, 32
+ br label %while.body
+}
+; CHECK: adds r3, r0, r1
+; CHECK: push {r5}
+; CHECK: pop {r1}
+; CHECK: adcs r1, r1
+; CHECK: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK: ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK: adds r2, r0, r2
+; CHECK: push {r5}
+; CHECK: pop {r4}
+; CHECK: adcs r4, r4
+; CHECK: adds r0, r2, r5
+; CHECK: push {r3}
+; CHECK: pop {r0}
+; CHECK: adcs r0, r4
+; CHECK: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK: str r0, [r6]
+; CHECK: ldr r0, [r7]
+; CHECK: ldr r6, [sp] @ 4-byte Reload
+; CHECK: ldr r6, [r6]
+; CHECK: adds r0, r6, r0
diff --git a/test/CodeGen/Thumb/pr35836_2.ll b/test/CodeGen/Thumb/pr35836_2.ll
new file mode 100644
index 000000000000..af115e8ce21a
--- /dev/null
+++ b/test/CodeGen/Thumb/pr35836_2.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i128:64-v128:64:128-a:0:64-n64-S64"
+target triple = "thumbv6---gnueabi"
+
+; Function Attrs: norecurse nounwind readonly
+define i128 @a(i64* nocapture readonly %z) local_unnamed_addr #0 {
+entry:
+ %0 = load i64, i64* %z, align 4
+ %conv.i = zext i64 %0 to i128
+ %arrayidx1 = getelementptr inbounds i64, i64* %z, i64 2
+ %1 = load i64, i64* %arrayidx1, align 4
+ %conv.i38 = zext i64 %1 to i128
+ %shl.i39 = shl nuw i128 %conv.i38, 64
+ %or = or i128 %shl.i39, %conv.i
+ %arrayidx3 = getelementptr inbounds i64, i64* %z, i64 1
+ %2 = load i64, i64* %arrayidx3, align 4
+ %conv.i37 = zext i64 %2 to i128
+ %arrayidx5 = getelementptr inbounds i64, i64* %z, i64 3
+ %3 = load i64, i64* %arrayidx5, align 4
+ %conv.i35 = zext i64 %3 to i128
+ %shl.i36 = shl nuw i128 %conv.i35, 64
+ %or7 = or i128 %shl.i36, %conv.i37
+ %arrayidx10 = getelementptr inbounds i64, i64* %z, i64 4
+ %4 = load i64, i64* %arrayidx10, align 4
+ %conv.i64 = zext i64 %4 to i128
+ %shl.i33 = shl nuw i128 %conv.i64, 64
+ %or12 = or i128 %shl.i33, %conv.i
+ %arrayidx15 = getelementptr inbounds i64, i64* %z, i64 5
+ %5 = load i64, i64* %arrayidx15, align 4
+ %conv.i30 = zext i64 %5 to i128
+ %shl.i = shl nuw i128 %conv.i30, 64
+ %or17 = or i128 %shl.i, %conv.i37
+ %add = add i128 %or7, %or
+ %add18 = add i128 %or17, %or12
+ %mul = mul i128 %add18, %add
+ ret i128 %mul
+}
+; CHECK: adds r4, r2, r7
+; CHECK: mov r4, r1
+; CHECK: adcs r4, r6
+; CHECK: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK: adcs r5, r4
+; CHECK: ldr r4, [sp, #24] @ 4-byte Reload
+; CHECK: adcs r3, r4
+; CHECK: adds r4, r2, r7
+; CHECK: adcs r1, r6
+; CHECK: mov r2, sp
+; CHECK: str r4, [r2]
+; CHECK: str r1, [r2, #4]
+; CHECK: ldr r6, [r0, #16]
+; CHECK: ldr r7, [r0, #24]
+; CHECK: adcs r7, r6
+; CHECK: str r7, [r2, #8]
+; CHECK: ldr r6, [r0, #20]
+; CHECK: ldr r0, [r0, #28]
+; CHECK: adcs r0, r6
diff --git a/test/CodeGen/X86/O0-pipeline.ll b/test/CodeGen/X86/O0-pipeline.ll
index cb7dabefe45a..3a720a5288a2 100644
--- a/test/CodeGen/X86/O0-pipeline.ll
+++ b/test/CodeGen/X86/O0-pipeline.ll
@@ -25,6 +25,7 @@
; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
; CHECK-NEXT: Scalarize Masked Memory Intrinsics
; CHECK-NEXT: Expand reduction intrinsics
+; CHECK-NEXT: Expand indirectbr instructions
; CHECK-NEXT: Rewrite Symbols
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
@@ -57,6 +58,7 @@
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Insert XRay ops
; CHECK-NEXT: Implement the 'patchable-function' attribute
+; CHECK-NEXT: X86 Retpoline Thunks
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: MachineDominator Tree Construction
diff --git a/test/CodeGen/X86/retpoline-external.ll b/test/CodeGen/X86/retpoline-external.ll
new file mode 100644
index 000000000000..66d32ba5d73d
--- /dev/null
+++ b/test/CodeGen/X86/retpoline-external.ll
@@ -0,0 +1,166 @@
+; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
+; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
+
+; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
+; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
+entry:
+ tail call void @bar(i32 %x)
+ tail call void %fp(i32 %x)
+ tail call void @bar(i32 %x)
+ tail call void %fp(i32 %x)
+ ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG: movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG: movl %esi, %[[x:[^ ]*]]
+; X64: movl %[[x]], %edi
+; X64: callq bar
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq %[[fp]], %r11
+; X64: callq __llvm_external_retpoline_r11
+; X64: movl %[[x]], %edi
+; X64: callq bar
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq %[[fp]], %r11
+; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST: callq bar
+; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: callq bar
+; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+
+; X86-LABEL: icall_reg:
+; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
+; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]
+; X86: pushl %[[x]]
+; X86: calll bar
+; X86: movl %[[fp]], %eax
+; X86: pushl %[[x]]
+; X86: calll __llvm_external_retpoline_eax
+; X86: pushl %[[x]]
+; X86: calll bar
+; X86: movl %[[fp]], %eax
+; X86: pushl %[[x]]
+; X86: calll __llvm_external_retpoline_eax
+; X86-NOT: # TAILCALL
+
+; X86FAST-LABEL: icall_reg:
+; X86FAST: calll bar
+; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: calll bar
+; X86FAST: calll __llvm_external_retpoline_eax
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+ %fp1 = load void (i32)*, void (i32)** @global_fp
+ call void %fp1(i32 %x)
+ %fp2 = load void (i32)*, void (i32)** @global_fp
+ tail call void %fp2(i32 %x)
+ ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG: movl %edi, %[[x:[^ ]*]]
+; X64-DAG: movq global_fp(%rip), %r11
+; X64: callq __llvm_external_retpoline_r11
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq global_fp(%rip), %r11
+; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST: movq global_fp(%rip), %r11
+; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: movq global_fp(%rip), %r11
+; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+
+; X86-LABEL: icall_global_fp:
+; X86: movl global_fp, %eax
+; X86: pushl 4(%esp)
+; X86: calll __llvm_external_retpoline_eax
+; X86: addl $4, %esp
+; X86: movl global_fp, %eax
+; X86: jmp __llvm_external_retpoline_eax # TAILCALL
+
+; X86FAST-LABEL: icall_global_fp:
+; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+ %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+ %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+ %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+ %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+ tail call void %fp(%struct.Foo* %obj)
+ tail call void %fp(%struct.Foo* %obj)
+ ret void
+}
+
+; X64-LABEL: vcall:
+; X64: movq %rdi, %[[obj:[^ ]*]]
+; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]
+; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64: movq %[[fp]], %r11
+; X64: callq __llvm_external_retpoline_r11
+; X64-DAG: movq %[[obj]], %rdi
+; X64-DAG: movq %[[fp]], %r11
+; X64: jmp __llvm_external_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST: callq __llvm_external_retpoline_r11
+; X64FAST: jmp __llvm_external_retpoline_r11 # TAILCALL
+
+; X86-LABEL: vcall:
+; X86: movl 8(%esp), %[[obj:[^ ]*]]
+; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]
+; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
+; X86: movl %[[fp]], %eax
+; X86: pushl %[[obj]]
+; X86: calll __llvm_external_retpoline_eax
+; X86: addl $4, %esp
+; X86: movl %[[fp]], %eax
+; X86: jmp __llvm_external_retpoline_eax # TAILCALL
+
+; X86FAST-LABEL: vcall:
+; X86FAST: calll __llvm_external_retpoline_eax
+; X86FAST: jmp __llvm_external_retpoline_eax # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+ tail call void @direct_callee()
+ ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64: jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST: jmp direct_callee # TAILCALL
+; X86-LABEL: direct_tail:
+; X86: jmp direct_callee # TAILCALL
+; X86FAST-LABEL: direct_tail:
+; X86FAST: jmp direct_callee # TAILCALL
+
+
+; Lastly check that no thunks were emitted.
+; X64-NOT: __{{.*}}_retpoline_{{.*}}:
+; X64FAST-NOT: __{{.*}}_retpoline_{{.*}}:
+; X86-NOT: __{{.*}}_retpoline_{{.*}}:
+; X86FAST-NOT: __{{.*}}_retpoline_{{.*}}:
+
+
+attributes #0 = { "target-features"="+retpoline-external-thunk" }
diff --git a/test/CodeGen/X86/retpoline.ll b/test/CodeGen/X86/retpoline.ll
new file mode 100644
index 000000000000..57d3388b812a
--- /dev/null
+++ b/test/CodeGen/X86/retpoline.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
+; RUN: llc -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
+
+; RUN: llc -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
+; RUN: llc -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
+
+declare void @bar(i32)
+
+; Test a simple indirect call and tail call.
+define void @icall_reg(void (i32)* %fp, i32 %x) #0 {
+entry:
+ tail call void @bar(i32 %x)
+ tail call void %fp(i32 %x)
+ tail call void @bar(i32 %x)
+ tail call void %fp(i32 %x)
+ ret void
+}
+
+; X64-LABEL: icall_reg:
+; X64-DAG: movq %rdi, %[[fp:[^ ]*]]
+; X64-DAG: movl %esi, %[[x:[^ ]*]]
+; X64: movl %[[x]], %edi
+; X64: callq bar
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq %[[fp]], %r11
+; X64: callq __llvm_retpoline_r11
+; X64: movl %[[x]], %edi
+; X64: callq bar
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq %[[fp]], %r11
+; X64: jmp __llvm_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_reg:
+; X64FAST: callq bar
+; X64FAST: callq __llvm_retpoline_r11
+; X64FAST: callq bar
+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
+
+; X86-LABEL: icall_reg:
+; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]]
+; X86-DAG: movl 16(%esp), %[[x:[^ ]*]]
+; X86: pushl %[[x]]
+; X86: calll bar
+; X86: movl %[[fp]], %eax
+; X86: pushl %[[x]]
+; X86: calll __llvm_retpoline_eax
+; X86: pushl %[[x]]
+; X86: calll bar
+; X86: movl %[[fp]], %eax
+; X86: pushl %[[x]]
+; X86: calll __llvm_retpoline_eax
+; X86-NOT: # TAILCALL
+
+; X86FAST-LABEL: icall_reg:
+; X86FAST: calll bar
+; X86FAST: calll __llvm_retpoline_eax
+; X86FAST: calll bar
+; X86FAST: calll __llvm_retpoline_eax
+
+
+@global_fp = external global void (i32)*
+
+; Test an indirect call through a global variable.
+define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 {
+ %fp1 = load void (i32)*, void (i32)** @global_fp
+ call void %fp1(i32 %x)
+ %fp2 = load void (i32)*, void (i32)** @global_fp
+ tail call void %fp2(i32 %x)
+ ret void
+}
+
+; X64-LABEL: icall_global_fp:
+; X64-DAG: movl %edi, %[[x:[^ ]*]]
+; X64-DAG: movq global_fp(%rip), %r11
+; X64: callq __llvm_retpoline_r11
+; X64-DAG: movl %[[x]], %edi
+; X64-DAG: movq global_fp(%rip), %r11
+; X64: jmp __llvm_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: icall_global_fp:
+; X64FAST: movq global_fp(%rip), %r11
+; X64FAST: callq __llvm_retpoline_r11
+; X64FAST: movq global_fp(%rip), %r11
+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
+
+; X86-LABEL: icall_global_fp:
+; X86: movl global_fp, %eax
+; X86: pushl 4(%esp)
+; X86: calll __llvm_retpoline_eax
+; X86: addl $4, %esp
+; X86: movl global_fp, %eax
+; X86: jmp __llvm_retpoline_eax # TAILCALL
+
+; X86FAST-LABEL: icall_global_fp:
+; X86FAST: calll __llvm_retpoline_eax
+; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
+
+
+%struct.Foo = type { void (%struct.Foo*)** }
+
+; Test an indirect call through a vtable.
+define void @vcall(%struct.Foo* %obj) #0 {
+ %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0
+ %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field
+ %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1
+ %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot
+ tail call void %fp(%struct.Foo* %obj)
+ tail call void %fp(%struct.Foo* %obj)
+ ret void
+}
+
+; X64-LABEL: vcall:
+; X64: movq %rdi, %[[obj:[^ ]*]]
+; X64: movq (%[[obj]]), %[[vptr:[^ ]*]]
+; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]]
+; X64: movq %[[fp]], %r11
+; X64: callq __llvm_retpoline_r11
+; X64-DAG: movq %[[obj]], %rdi
+; X64-DAG: movq %[[fp]], %r11
+; X64: jmp __llvm_retpoline_r11 # TAILCALL
+
+; X64FAST-LABEL: vcall:
+; X64FAST: callq __llvm_retpoline_r11
+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
+
+; X86-LABEL: vcall:
+; X86: movl 8(%esp), %[[obj:[^ ]*]]
+; X86: movl (%[[obj]]), %[[vptr:[^ ]*]]
+; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]]
+; X86: movl %[[fp]], %eax
+; X86: pushl %[[obj]]
+; X86: calll __llvm_retpoline_eax
+; X86: addl $4, %esp
+; X86: movl %[[fp]], %eax
+; X86: jmp __llvm_retpoline_eax # TAILCALL
+
+; X86FAST-LABEL: vcall:
+; X86FAST: calll __llvm_retpoline_eax
+; X86FAST: jmp __llvm_retpoline_eax # TAILCALL
+
+
+declare void @direct_callee()
+
+define void @direct_tail() #0 {
+ tail call void @direct_callee()
+ ret void
+}
+
+; X64-LABEL: direct_tail:
+; X64: jmp direct_callee # TAILCALL
+; X64FAST-LABEL: direct_tail:
+; X64FAST: jmp direct_callee # TAILCALL
+; X86-LABEL: direct_tail:
+; X86: jmp direct_callee # TAILCALL
+; X86FAST-LABEL: direct_tail:
+; X86FAST: jmp direct_callee # TAILCALL
+
+
+declare void @nonlazybind_callee() #1
+
+define void @nonlazybind_caller() #0 {
+ call void @nonlazybind_callee()
+ tail call void @nonlazybind_callee()
+ ret void
+}
+
+; X64-LABEL: nonlazybind_caller:
+; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
+; X64: movq %[[REG]], %r11
+; X64: callq __llvm_retpoline_r11
+; X64: movq %[[REG]], %r11
+; X64: jmp __llvm_retpoline_r11 # TAILCALL
+; X64FAST-LABEL: nonlazybind_caller:
+; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST: callq __llvm_retpoline_r11
+; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11
+; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL
+; X86-LABEL: nonlazybind_caller:
+; X86: calll nonlazybind_callee@PLT
+; X86: jmp nonlazybind_callee@PLT # TAILCALL
+; X86FAST-LABEL: nonlazybind_caller:
+; X86FAST: calll nonlazybind_callee@PLT
+; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL
+
+
+@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0),
+ i8* blockaddress(@indirectbr_rewrite, %bb1),
+ i8* blockaddress(@indirectbr_rewrite, %bb2),
+ i8* blockaddress(@indirectbr_rewrite, %bb3),
+ i8* blockaddress(@indirectbr_rewrite, %bb4),
+ i8* blockaddress(@indirectbr_rewrite, %bb5),
+ i8* blockaddress(@indirectbr_rewrite, %bb6),
+ i8* blockaddress(@indirectbr_rewrite, %bb7),
+ i8* blockaddress(@indirectbr_rewrite, %bb8),
+ i8* blockaddress(@indirectbr_rewrite, %bb9)]
+
+; Check that when retpolines are enabled a function with indirectbr gets
+; rewritten to use switch, and that in turn doesn't get lowered as a jump
+; table.
+define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 {
+; X64-LABEL: indirectbr_rewrite:
+; X64-NOT: jmpq
+; X86-LABEL: indirectbr_rewrite:
+; X86-NOT: jmpl
+entry:
+ %i0 = load i64, i64* %p
+ %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0
+ %target0 = load i8*, i8** %target.i0
+ indirectbr i8* %target0, [label %bb1, label %bb3]
+
+bb0:
+ store volatile i64 0, i64* %sink
+ br label %latch
+
+bb1:
+ store volatile i64 1, i64* %sink
+ br label %latch
+
+bb2:
+ store volatile i64 2, i64* %sink
+ br label %latch
+
+bb3:
+ store volatile i64 3, i64* %sink
+ br label %latch
+
+bb4:
+ store volatile i64 4, i64* %sink
+ br label %latch
+
+bb5:
+ store volatile i64 5, i64* %sink
+ br label %latch
+
+bb6:
+ store volatile i64 6, i64* %sink
+ br label %latch
+
+bb7:
+ store volatile i64 7, i64* %sink
+ br label %latch
+
+bb8:
+ store volatile i64 8, i64* %sink
+ br label %latch
+
+bb9:
+ store volatile i64 9, i64* %sink
+ br label %latch
+
+latch:
+ %i.next = load i64, i64* %p
+ %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next
+ %target.next = load i8*, i8** %target.i.next
+ ; Potentially hit a full 10 successors here so that even if we rewrite as
+ ; a switch it will try to be lowered with a jump table.
+ indirectbr i8* %target.next, [label %bb0,
+ label %bb1,
+ label %bb2,
+ label %bb3,
+ label %bb4,
+ label %bb5,
+ label %bb6,
+ label %bb7,
+ label %bb8,
+ label %bb9]
+}
+
+; Lastly check that the necessary thunks were emitted.
+;
+; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
+; X64-NEXT: .hidden __llvm_retpoline_r11
+; X64-NEXT: .weak __llvm_retpoline_r11
+; X64: __llvm_retpoline_r11:
+; X64-NEXT: # {{.*}} # %entry
+; X64-NEXT: callq [[CALL_TARGET:.*]]
+; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
+; X64-NEXT: # %entry
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: pause
+; X64-NEXT: lfence
+; X64-NEXT: jmp [[CAPTURE_SPEC]]
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: [[CALL_TARGET]]: # Block address taken
+; X64-NEXT: # %entry
+; X64-NEXT: movq %r11, (%rsp)
+; X64-NEXT: retq
+;
+; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
+; X86-NEXT: .hidden __llvm_retpoline_eax
+; X86-NEXT: .weak __llvm_retpoline_eax
+; X86: __llvm_retpoline_eax:
+; X86-NEXT: # {{.*}} # %entry
+; X86-NEXT: calll [[CALL_TARGET:.*]]
+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: pause
+; X86-NEXT: lfence
+; X86-NEXT: jmp [[CAPTURE_SPEC]]
+; X86-NEXT: .p2align 4, 0x90
+; X86-NEXT: [[CALL_TARGET]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: movl %eax, (%esp)
+; X86-NEXT: retl
+;
+; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
+; X86-NEXT: .hidden __llvm_retpoline_ecx
+; X86-NEXT: .weak __llvm_retpoline_ecx
+; X86: __llvm_retpoline_ecx:
+; X86-NEXT: # {{.*}} # %entry
+; X86-NEXT: calll [[CALL_TARGET:.*]]
+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: pause
+; X86-NEXT: lfence
+; X86-NEXT: jmp [[CAPTURE_SPEC]]
+; X86-NEXT: .p2align 4, 0x90
+; X86-NEXT: [[CALL_TARGET]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: movl %ecx, (%esp)
+; X86-NEXT: retl
+;
+; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
+; X86-NEXT: .hidden __llvm_retpoline_edx
+; X86-NEXT: .weak __llvm_retpoline_edx
+; X86: __llvm_retpoline_edx:
+; X86-NEXT: # {{.*}} # %entry
+; X86-NEXT: calll [[CALL_TARGET:.*]]
+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: pause
+; X86-NEXT: lfence
+; X86-NEXT: jmp [[CAPTURE_SPEC]]
+; X86-NEXT: .p2align 4, 0x90
+; X86-NEXT: [[CALL_TARGET]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: movl %edx, (%esp)
+; X86-NEXT: retl
+;
+; X86-LABEL: .section .text.__llvm_retpoline_push,{{.*}},__llvm_retpoline_push,comdat
+; X86-NEXT: .hidden __llvm_retpoline_push
+; X86-NEXT: .weak __llvm_retpoline_push
+; X86: __llvm_retpoline_push:
+; X86-NEXT: # {{.*}} # %entry
+; X86-NEXT: calll [[CALL_TARGET:.*]]
+; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: pause
+; X86-NEXT: lfence
+; X86-NEXT: jmp [[CAPTURE_SPEC]]
+; X86-NEXT: .p2align 4, 0x90
+; X86-NEXT: [[CALL_TARGET]]: # Block address taken
+; X86-NEXT: # %entry
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: pushl 4(%esp)
+; X86-NEXT: pushl 4(%esp)
+; X86-NEXT: popl 8(%esp)
+; X86-NEXT: popl (%esp)
+; X86-NEXT: retl
+
+
+attributes #0 = { "target-features"="+retpoline" }
+attributes #1 = { nonlazybind }