aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/cmpxchg-clobber-flags.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/cmpxchg-clobber-flags.ll')
-rw-r--r--test/CodeGen/X86/cmpxchg-clobber-flags.ll363
1 files changed, 214 insertions, 149 deletions
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index 8d289fa9fb03..827aba78699c 100644
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,100 +1,110 @@
-; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
-; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
+; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
-; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
-; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf
-; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf
-; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf
-
-; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME
-; in X86InstrInfo::copyPhysReg() is resolved.
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
+; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
declare i32 @foo()
declare i32 @bar(i64)
-define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
-; i386-LABEL: test_intervening_call:
-; i386: cmpxchg8b
-; i386-NEXT: pushl %eax
-; i386-NEXT: seto %al
-; i386-NEXT: lahf
-; i386-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386-NEXT: popl %eax
-; i386-NEXT: subl $8, %esp
-; i386-NEXT: pushl %edx
-; i386-NEXT: pushl %eax
-; i386-NEXT: calll bar
-; i386-NEXT: addl $16, %esp
-; i386-NEXT: movl [[FLAGS]], %eax
-; i386-NEXT: addb $127, %al
-; i386-NEXT: sahf
-; i386-NEXT: jne
-
-; In the following case we get a long chain of EFLAGS save/restore due to
-; a sequence of:
+; In the following case when using fast scheduling we get a long chain of
+; EFLAGS save/restore due to a sequence of:
; cmpxchg8b (implicit-def eflags)
; eax = copy eflags
; adjcallstackdown32
; ...
; use of eax
; During PEI the adjcallstackdown32 is replaced with the subl which
-; clobbers eflags, effectively interfering in the liveness interval.
-; Is this a case we care about? Maybe no, considering this issue
-; happens with the fast pre-regalloc scheduler enforced. A more
-; performant scheduler would move the adjcallstackdown32 out of the
-; eflags liveness interval.
-
-; i386f-LABEL: test_intervening_call:
-; i386f: cmpxchg8b
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: seto %al
-; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386f-NEXT: popl %eax
-; i386f-NEXT: subl $8, %esp
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: movl %ecx, %eax
-; i386f-NEXT: addb $127, %al
-; i386f-NEXT: sahf
-; i386f-NEXT: popl %eax
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: seto %al
-; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, %esi
-; i386f-NEXT: popl %eax
-; i386f-NEXT: pushl %edx
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: calll bar
-; i386f-NEXT: addl $16, %esp
-; i386f-NEXT: movl %esi, %eax
-; i386f-NEXT: addb $127, %al
-
-; x8664-LABEL: test_intervening_call:
-; x8664: cmpxchgq
-; x8664: pushfq
-; x8664-NEXT: popq [[FLAGS:%.*]]
-; x8664-NEXT: movq %rax, %rdi
-; x8664-NEXT: callq bar
-; x8664-NEXT: pushq [[FLAGS]]
-; x8664-NEXT: popfq
-; x8664-NEXT: jne
-
-; x8664-sahf-LABEL: test_intervening_call:
-; x8664-sahf: cmpxchgq
-; x8664-sahf: pushq %rax
-; x8664-sahf-NEXT: seto %al
-; x8664-sahf-NEXT: lahf
-; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
-; x8664-sahf-NEXT: popq %rax
-; x8664-sahf-NEXT: movq %rax, %rdi
-; x8664-sahf-NEXT: callq bar
-; RAX is dead, no need to push and pop it.
-; x8664-sahf-NEXT: movq [[FLAGS]], %rax
-; x8664-sahf-NEXT: addb $127, %al
-; x8664-sahf-NEXT: sahf
-; x8664-sahf-NEXT: jne
-
+; clobbers eflags, effectively interfering in the liveness interval. However,
+; we then promote these copies into independent conditions in GPRs that avoids
+; repeated saving and restoring logic and can be trivially managed by the
+; register allocator.
+define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
+; 32-GOOD-RA-LABEL: test_intervening_call:
+; 32-GOOD-RA: # %bb.0: # %entry
+; 32-GOOD-RA-NEXT: pushl %ebx
+; 32-GOOD-RA-NEXT: pushl %esi
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-GOOD-RA-NEXT: setne %bl
+; 32-GOOD-RA-NEXT: subl $8, %esp
+; 32-GOOD-RA-NEXT: pushl %edx
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: calll bar
+; 32-GOOD-RA-NEXT: addl $16, %esp
+; 32-GOOD-RA-NEXT: testb %bl, %bl
+; 32-GOOD-RA-NEXT: jne .LBB0_3
+; 32-GOOD-RA-NEXT: # %bb.1: # %t
+; 32-GOOD-RA-NEXT: movl $42, %eax
+; 32-GOOD-RA-NEXT: jmp .LBB0_2
+; 32-GOOD-RA-NEXT: .LBB0_3: # %f
+; 32-GOOD-RA-NEXT: xorl %eax, %eax
+; 32-GOOD-RA-NEXT: .LBB0_2: # %t
+; 32-GOOD-RA-NEXT: xorl %edx, %edx
+; 32-GOOD-RA-NEXT: addl $4, %esp
+; 32-GOOD-RA-NEXT: popl %esi
+; 32-GOOD-RA-NEXT: popl %ebx
+; 32-GOOD-RA-NEXT: retl
+;
+; 32-FAST-RA-LABEL: test_intervening_call:
+; 32-FAST-RA: # %bb.0: # %entry
+; 32-FAST-RA-NEXT: pushl %ebx
+; 32-FAST-RA-NEXT: pushl %esi
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
+; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
+; 32-FAST-RA-NEXT: setne %bl
+; 32-FAST-RA-NEXT: subl $8, %esp
+; 32-FAST-RA-NEXT: pushl %edx
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: calll bar
+; 32-FAST-RA-NEXT: addl $16, %esp
+; 32-FAST-RA-NEXT: testb %bl, %bl
+; 32-FAST-RA-NEXT: jne .LBB0_3
+; 32-FAST-RA-NEXT: # %bb.1: # %t
+; 32-FAST-RA-NEXT: movl $42, %eax
+; 32-FAST-RA-NEXT: jmp .LBB0_2
+; 32-FAST-RA-NEXT: .LBB0_3: # %f
+; 32-FAST-RA-NEXT: xorl %eax, %eax
+; 32-FAST-RA-NEXT: .LBB0_2: # %t
+; 32-FAST-RA-NEXT: xorl %edx, %edx
+; 32-FAST-RA-NEXT: addl $4, %esp
+; 32-FAST-RA-NEXT: popl %esi
+; 32-FAST-RA-NEXT: popl %ebx
+; 32-FAST-RA-NEXT: retl
+;
+; 64-ALL-LABEL: test_intervening_call:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: movq %rsi, %rax
+; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
+; 64-ALL-NEXT: setne %bl
+; 64-ALL-NEXT: movq %rax, %rdi
+; 64-ALL-NEXT: callq bar
+; 64-ALL-NEXT: testb %bl, %bl
+; 64-ALL-NEXT: jne .LBB0_2
+; 64-ALL-NEXT: # %bb.1: # %t
+; 64-ALL-NEXT: movl $42, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
+; 64-ALL-NEXT: .LBB0_2: # %f
+; 64-ALL-NEXT: xorl %eax, %eax
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: retq
+entry:
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
%p = extractvalue { i64, i1 } %cx, 1
@@ -109,23 +119,62 @@ f:
}
; Interesting in producing a clobber without any function calls.
-define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
-; i386-LABEL: test_control_flow:
-; i386: cmpxchg
-; i386-NEXT: jne
-
-; i386f-LABEL: test_control_flow:
-; i386f: cmpxchg
-; i386f-NEXT: jne
-
-; x8664-LABEL: test_control_flow:
-; x8664: cmpxchg
-; x8664-NEXT: jne
-
-; x8664-sahf-LABEL: test_control_flow:
-; x8664-sahf: cmpxchg
-; x8664-sahf-NEXT: jne
-
+define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
+; 32-ALL-LABEL: test_control_flow:
+; 32-ALL: # %bb.0: # %entry
+; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
+; 32-ALL-NEXT: jle .LBB1_6
+; 32-ALL-NEXT: # %bb.1: # %loop_start
+; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-ALL-NEXT: .p2align 4, 0x90
+; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
+; 32-ALL-NEXT: # =>This Loop Header: Depth=1
+; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2
+; 32-ALL-NEXT: movl (%ecx), %edx
+; 32-ALL-NEXT: .p2align 4, 0x90
+; 32-ALL-NEXT: .LBB1_3: # %while.cond.i
+; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1
+; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2
+; 32-ALL-NEXT: movl %edx, %eax
+; 32-ALL-NEXT: xorl %edx, %edx
+; 32-ALL-NEXT: testl %eax, %eax
+; 32-ALL-NEXT: je .LBB1_3
+; 32-ALL-NEXT: # %bb.4: # %while.body.i
+; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
+; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
+; 32-ALL-NEXT: jne .LBB1_2
+; 32-ALL-NEXT: # %bb.5:
+; 32-ALL-NEXT: xorl %eax, %eax
+; 32-ALL-NEXT: .LBB1_6: # %cond.end
+; 32-ALL-NEXT: retl
+;
+; 64-ALL-LABEL: test_control_flow:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: cmpl %edx, %esi
+; 64-ALL-NEXT: jle .LBB1_5
+; 64-ALL-NEXT: .p2align 4, 0x90
+; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
+; 64-ALL-NEXT: # =>This Loop Header: Depth=1
+; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2
+; 64-ALL-NEXT: movl (%rdi), %ecx
+; 64-ALL-NEXT: .p2align 4, 0x90
+; 64-ALL-NEXT: .LBB1_2: # %while.cond.i
+; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
+; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2
+; 64-ALL-NEXT: movl %ecx, %eax
+; 64-ALL-NEXT: xorl %ecx, %ecx
+; 64-ALL-NEXT: testl %eax, %eax
+; 64-ALL-NEXT: je .LBB1_2
+; 64-ALL-NEXT: # %bb.3: # %while.body.i
+; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
+; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
+; 64-ALL-NEXT: jne .LBB1_1
+; 64-ALL-NEXT: # %bb.4:
+; 64-ALL-NEXT: xorl %esi, %esi
+; 64-ALL-NEXT: .LBB1_5: # %cond.end
+; 64-ALL-NEXT: movl %esi, %eax
+; 64-ALL-NEXT: retq
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
@@ -158,52 +207,68 @@ cond.end:
; This one is an interesting case because CMOV doesn't have a chain
; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
-define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
-; i386-LABEL: test_feed_cmov:
-; i386: cmpxchgl
-; i386-NEXT: seto %al
-; i386-NEXT: lahf
-; i386-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386-NEXT: calll foo
-; i386-NEXT: pushl %eax
-; i386-NEXT: movl [[FLAGS]], %eax
-; i386-NEXT: addb $127, %al
-; i386-NEXT: sahf
-; i386-NEXT: popl %eax
-
-; i386f-LABEL: test_feed_cmov:
-; i386f: cmpxchgl
-; i386f-NEXT: seto %al
-; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386f-NEXT: calll foo
-; i386f-NEXT: pushl %eax
-; i386f-NEXT: movl [[FLAGS]], %eax
-; i386f-NEXT: addb $127, %al
-; i386f-NEXT: sahf
-; i386f-NEXT: popl %eax
-
-; x8664-LABEL: test_feed_cmov:
-; x8664: cmpxchg
-; x8664: pushfq
-; x8664-NEXT: popq [[FLAGS:%.*]]
-; x8664-NEXT: callq foo
-; x8664-NEXT: pushq [[FLAGS]]
-; x8664-NEXT: popfq
-
-; x8664-sahf-LABEL: test_feed_cmov:
-; x8664-sahf: cmpxchgl
-; RAX is dead, do not push or pop it.
-; x8664-sahf-NEXT: seto %al
-; x8664-sahf-NEXT: lahf
-; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
-; x8664-sahf-NEXT: callq foo
-; x8664-sahf-NEXT: pushq %rax
-; x8664-sahf-NEXT: movq [[FLAGS]], %rax
-; x8664-sahf-NEXT: addb $127, %al
-; x8664-sahf-NEXT: sahf
-; x8664-sahf-NEXT: popq %rax
-
+define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
+; 32-GOOD-RA-LABEL: test_feed_cmov:
+; 32-GOOD-RA: # %bb.0: # %entry
+; 32-GOOD-RA-NEXT: pushl %ebx
+; 32-GOOD-RA-NEXT: pushl %esi
+; 32-GOOD-RA-NEXT: pushl %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
+; 32-GOOD-RA-NEXT: sete %bl
+; 32-GOOD-RA-NEXT: calll foo
+; 32-GOOD-RA-NEXT: testb %bl, %bl
+; 32-GOOD-RA-NEXT: jne .LBB2_2
+; 32-GOOD-RA-NEXT: # %bb.1: # %entry
+; 32-GOOD-RA-NEXT: movl %eax, %esi
+; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
+; 32-GOOD-RA-NEXT: movl %esi, %eax
+; 32-GOOD-RA-NEXT: addl $4, %esp
+; 32-GOOD-RA-NEXT: popl %esi
+; 32-GOOD-RA-NEXT: popl %ebx
+; 32-GOOD-RA-NEXT: retl
+;
+; 32-FAST-RA-LABEL: test_feed_cmov:
+; 32-FAST-RA: # %bb.0: # %entry
+; 32-FAST-RA-NEXT: pushl %ebx
+; 32-FAST-RA-NEXT: pushl %esi
+; 32-FAST-RA-NEXT: pushl %eax
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
+; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
+; 32-FAST-RA-NEXT: sete %bl
+; 32-FAST-RA-NEXT: calll foo
+; 32-FAST-RA-NEXT: testb %bl, %bl
+; 32-FAST-RA-NEXT: jne .LBB2_2
+; 32-FAST-RA-NEXT: # %bb.1: # %entry
+; 32-FAST-RA-NEXT: movl %eax, %esi
+; 32-FAST-RA-NEXT: .LBB2_2: # %entry
+; 32-FAST-RA-NEXT: movl %esi, %eax
+; 32-FAST-RA-NEXT: addl $4, %esp
+; 32-FAST-RA-NEXT: popl %esi
+; 32-FAST-RA-NEXT: popl %ebx
+; 32-FAST-RA-NEXT: retl
+;
+; 64-ALL-LABEL: test_feed_cmov:
+; 64-ALL: # %bb.0: # %entry
+; 64-ALL-NEXT: pushq %rbp
+; 64-ALL-NEXT: pushq %rbx
+; 64-ALL-NEXT: pushq %rax
+; 64-ALL-NEXT: movl %edx, %ebx
+; 64-ALL-NEXT: movl %esi, %eax
+; 64-ALL-NEXT: lock cmpxchgl %ebx, (%rdi)
+; 64-ALL-NEXT: sete %bpl
+; 64-ALL-NEXT: callq foo
+; 64-ALL-NEXT: testb %bpl, %bpl
+; 64-ALL-NEXT: cmovnel %ebx, %eax
+; 64-ALL-NEXT: addq $8, %rsp
+; 64-ALL-NEXT: popq %rbx
+; 64-ALL-NEXT: popq %rbp
+; 64-ALL-NEXT: retq
+entry:
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1