diff options
Diffstat (limited to 'test/CodeGen/X86/cmpxchg-clobber-flags.ll')
-rw-r--r-- | test/CodeGen/X86/cmpxchg-clobber-flags.ll | 363 |
1 files changed, 214 insertions, 149 deletions
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll index 8d289fa9fb03..827aba78699c 100644 --- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,100 +1,110 @@ -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386 -; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664 -; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664 -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf -; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf - -; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME -; in X86InstrInfo::copyPhysReg() is resolved. +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF declare i32 @foo() declare i32 @bar(i64) -define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { -; i386-LABEL: test_intervening_call: -; i386: cmpxchg8b -; i386-NEXT: pushl %eax -; i386-NEXT: seto %al -; i386-NEXT: lahf -; i386-NEXT: movl %eax, [[FLAGS:%.*]] -; i386-NEXT: popl %eax -; i386-NEXT: subl $8, %esp -; i386-NEXT: pushl %edx -; i386-NEXT: pushl %eax -; i386-NEXT: calll bar -; i386-NEXT: addl $16, %esp -; i386-NEXT: movl [[FLAGS]], %eax -; i386-NEXT: addb $127, %al -; i386-NEXT: sahf -; i386-NEXT: jne - -; In the following case we get a long chain of EFLAGS save/restore due to -; a sequence of: +; In the following case when using fast scheduling we get a long chain of +; EFLAGS save/restore due to a sequence of: ; cmpxchg8b (implicit-def eflags) ; eax = copy eflags ; adjcallstackdown32 ; ... ; use of eax ; During PEI the adjcallstackdown32 is replaced with the subl which -; clobbers eflags, effectively interfering in the liveness interval. -; Is this a case we care about? Maybe no, considering this issue -; happens with the fast pre-regalloc scheduler enforced. A more -; performant scheduler would move the adjcallstackdown32 out of the -; eflags liveness interval. - -; i386f-LABEL: test_intervening_call: -; i386f: cmpxchg8b -; i386f-NEXT: pushl %eax -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, [[FLAGS:%.*]] -; i386f-NEXT: popl %eax -; i386f-NEXT: subl $8, %esp -; i386f-NEXT: pushl %eax -; i386f-NEXT: movl %ecx, %eax -; i386f-NEXT: addb $127, %al -; i386f-NEXT: sahf -; i386f-NEXT: popl %eax -; i386f-NEXT: pushl %eax -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, %esi -; i386f-NEXT: popl %eax -; i386f-NEXT: pushl %edx -; i386f-NEXT: pushl %eax -; i386f-NEXT: calll bar -; i386f-NEXT: addl $16, %esp -; i386f-NEXT: movl %esi, %eax -; i386f-NEXT: addb $127, %al - -; x8664-LABEL: test_intervening_call: -; x8664: cmpxchgq -; x8664: pushfq -; x8664-NEXT: popq [[FLAGS:%.*]] -; x8664-NEXT: movq %rax, %rdi -; x8664-NEXT: callq bar -; x8664-NEXT: pushq [[FLAGS]] -; x8664-NEXT: popfq -; x8664-NEXT: jne - -; x8664-sahf-LABEL: test_intervening_call: -; x8664-sahf: cmpxchgq -; x8664-sahf: pushq %rax -; x8664-sahf-NEXT: seto %al -; x8664-sahf-NEXT: lahf -; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-sahf-NEXT: popq %rax -; x8664-sahf-NEXT: movq %rax, %rdi -; x8664-sahf-NEXT: callq bar -; RAX is dead, no need to push and pop it. -; x8664-sahf-NEXT: movq [[FLAGS]], %rax -; x8664-sahf-NEXT: addb $127, %al -; x8664-sahf-NEXT: sahf -; x8664-sahf-NEXT: jne - +; clobbers eflags, effectively interfering in the liveness interval. However, +; we then promote these copies into independent conditions in GPRs that avoids +; repeated saving and restoring logic and can be trivially managed by the +; register allocator. +define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind { +; 32-GOOD-RA-LABEL: test_intervening_call: +; 32-GOOD-RA: # %bb.0: # %entry +; 32-GOOD-RA-NEXT: pushl %ebx +; 32-GOOD-RA-NEXT: pushl %esi +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi) +; 32-GOOD-RA-NEXT: setne %bl +; 32-GOOD-RA-NEXT: subl $8, %esp +; 32-GOOD-RA-NEXT: pushl %edx +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: calll bar +; 32-GOOD-RA-NEXT: addl $16, %esp +; 32-GOOD-RA-NEXT: testb %bl, %bl +; 32-GOOD-RA-NEXT: jne .LBB0_3 +; 32-GOOD-RA-NEXT: # %bb.1: # %t +; 32-GOOD-RA-NEXT: movl $42, %eax +; 32-GOOD-RA-NEXT: jmp .LBB0_2 +; 32-GOOD-RA-NEXT: .LBB0_3: # %f +; 32-GOOD-RA-NEXT: xorl %eax, %eax +; 32-GOOD-RA-NEXT: .LBB0_2: # %t +; 32-GOOD-RA-NEXT: xorl %edx, %edx +; 32-GOOD-RA-NEXT: addl $4, %esp +; 32-GOOD-RA-NEXT: popl %esi +; 32-GOOD-RA-NEXT: popl %ebx +; 32-GOOD-RA-NEXT: retl +; +; 32-FAST-RA-LABEL: test_intervening_call: +; 32-FAST-RA: # %bb.0: # %entry +; 32-FAST-RA-NEXT: pushl %ebx +; 32-FAST-RA-NEXT: pushl %esi +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx +; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi) +; 32-FAST-RA-NEXT: setne %bl +; 32-FAST-RA-NEXT: subl $8, %esp +; 32-FAST-RA-NEXT: pushl %edx +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: calll bar +; 32-FAST-RA-NEXT: addl $16, %esp +; 32-FAST-RA-NEXT: testb %bl, %bl +; 32-FAST-RA-NEXT: jne .LBB0_3 +; 32-FAST-RA-NEXT: # %bb.1: # %t +; 32-FAST-RA-NEXT: movl $42, %eax +; 32-FAST-RA-NEXT: jmp .LBB0_2 +; 32-FAST-RA-NEXT: .LBB0_3: # %f +; 32-FAST-RA-NEXT: xorl %eax, %eax +; 32-FAST-RA-NEXT: .LBB0_2: # %t +; 32-FAST-RA-NEXT: xorl %edx, %edx +; 32-FAST-RA-NEXT: addl $4, %esp +; 32-FAST-RA-NEXT: popl %esi +; 32-FAST-RA-NEXT: popl %ebx +; 32-FAST-RA-NEXT: retl +; +; 64-ALL-LABEL: test_intervening_call: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: pushq %rbx +; 64-ALL-NEXT: movq %rsi, %rax +; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi) +; 64-ALL-NEXT: setne %bl +; 64-ALL-NEXT: movq %rax, %rdi +; 64-ALL-NEXT: callq bar +; 64-ALL-NEXT: testb %bl, %bl +; 64-ALL-NEXT: jne .LBB0_2 +; 64-ALL-NEXT: # %bb.1: # %t +; 64-ALL-NEXT: movl $42, %eax +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: retq +; 64-ALL-NEXT: .LBB0_2: # %f +; 64-ALL-NEXT: xorl %eax, %eax +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: retq +entry: %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst %v = extractvalue { i64, i1 } %cx, 0 %p = extractvalue { i64, i1 } %cx, 1 @@ -109,23 +119,62 @@ f: } ; Interesting in producing a clobber without any function calls. -define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) { -; i386-LABEL: test_control_flow: -; i386: cmpxchg -; i386-NEXT: jne - -; i386f-LABEL: test_control_flow: -; i386f: cmpxchg -; i386f-NEXT: jne - -; x8664-LABEL: test_control_flow: -; x8664: cmpxchg -; x8664-NEXT: jne - -; x8664-sahf-LABEL: test_control_flow: -; x8664-sahf: cmpxchg -; x8664-sahf-NEXT: jne - +define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind { +; 32-ALL-LABEL: test_control_flow: +; 32-ALL: # %bb.0: # %entry +; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; 32-ALL-NEXT: jle .LBB1_6 +; 32-ALL-NEXT: # %bb.1: # %loop_start +; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-ALL-NEXT: .p2align 4, 0x90 +; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i +; 32-ALL-NEXT: # =>This Loop Header: Depth=1 +; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2 +; 32-ALL-NEXT: movl (%ecx), %edx +; 32-ALL-NEXT: .p2align 4, 0x90 +; 32-ALL-NEXT: .LBB1_3: # %while.cond.i +; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1 +; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2 +; 32-ALL-NEXT: movl %edx, %eax +; 32-ALL-NEXT: xorl %edx, %edx +; 32-ALL-NEXT: testl %eax, %eax +; 32-ALL-NEXT: je .LBB1_3 +; 32-ALL-NEXT: # %bb.4: # %while.body.i +; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1 +; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx) +; 32-ALL-NEXT: jne .LBB1_2 +; 32-ALL-NEXT: # %bb.5: +; 32-ALL-NEXT: xorl %eax, %eax +; 32-ALL-NEXT: .LBB1_6: # %cond.end +; 32-ALL-NEXT: retl +; +; 64-ALL-LABEL: test_control_flow: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: cmpl %edx, %esi +; 64-ALL-NEXT: jle .LBB1_5 +; 64-ALL-NEXT: .p2align 4, 0x90 +; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i +; 64-ALL-NEXT: # =>This Loop Header: Depth=1 +; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2 +; 64-ALL-NEXT: movl (%rdi), %ecx +; 64-ALL-NEXT: .p2align 4, 0x90 +; 64-ALL-NEXT: .LBB1_2: # %while.cond.i +; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1 +; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2 +; 64-ALL-NEXT: movl %ecx, %eax +; 64-ALL-NEXT: xorl %ecx, %ecx +; 64-ALL-NEXT: testl %eax, %eax +; 64-ALL-NEXT: je .LBB1_2 +; 64-ALL-NEXT: # %bb.3: # %while.body.i +; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1 +; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi) +; 64-ALL-NEXT: jne .LBB1_1 +; 64-ALL-NEXT: # %bb.4: +; 64-ALL-NEXT: xorl %esi, %esi +; 64-ALL-NEXT: .LBB1_5: # %cond.end +; 64-ALL-NEXT: movl %esi, %eax +; 64-ALL-NEXT: retq entry: %cmp = icmp sgt i32 %i, %j br i1 %cmp, label %loop_start, label %cond.end @@ -158,52 +207,68 @@ cond.end: ; This one is an interesting case because CMOV doesn't have a chain ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here. -define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) { -; i386-LABEL: test_feed_cmov: -; i386: cmpxchgl -; i386-NEXT: seto %al -; i386-NEXT: lahf -; i386-NEXT: movl %eax, [[FLAGS:%.*]] -; i386-NEXT: calll foo -; i386-NEXT: pushl %eax -; i386-NEXT: movl [[FLAGS]], %eax -; i386-NEXT: addb $127, %al -; i386-NEXT: sahf -; i386-NEXT: popl %eax - -; i386f-LABEL: test_feed_cmov: -; i386f: cmpxchgl -; i386f-NEXT: seto %al -; i386f-NEXT: lahf -; i386f-NEXT: movl %eax, [[FLAGS:%.*]] -; i386f-NEXT: calll foo -; i386f-NEXT: pushl %eax -; i386f-NEXT: movl [[FLAGS]], %eax -; i386f-NEXT: addb $127, %al -; i386f-NEXT: sahf -; i386f-NEXT: popl %eax - -; x8664-LABEL: test_feed_cmov: -; x8664: cmpxchg -; x8664: pushfq -; x8664-NEXT: popq [[FLAGS:%.*]] -; x8664-NEXT: callq foo -; x8664-NEXT: pushq [[FLAGS]] -; x8664-NEXT: popfq - -; x8664-sahf-LABEL: test_feed_cmov: -; x8664-sahf: cmpxchgl -; RAX is dead, do not push or pop it. -; x8664-sahf-NEXT: seto %al -; x8664-sahf-NEXT: lahf -; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]] -; x8664-sahf-NEXT: callq foo -; x8664-sahf-NEXT: pushq %rax -; x8664-sahf-NEXT: movq [[FLAGS]], %rax -; x8664-sahf-NEXT: addb $127, %al -; x8664-sahf-NEXT: sahf -; x8664-sahf-NEXT: popq %rax - +define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind { +; 32-GOOD-RA-LABEL: test_feed_cmov: +; 32-GOOD-RA: # %bb.0: # %entry +; 32-GOOD-RA-NEXT: pushl %ebx +; 32-GOOD-RA-NEXT: pushl %esi +; 32-GOOD-RA-NEXT: pushl %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx) +; 32-GOOD-RA-NEXT: sete %bl +; 32-GOOD-RA-NEXT: calll foo +; 32-GOOD-RA-NEXT: testb %bl, %bl +; 32-GOOD-RA-NEXT: jne .LBB2_2 +; 32-GOOD-RA-NEXT: # %bb.1: # %entry +; 32-GOOD-RA-NEXT: movl %eax, %esi +; 32-GOOD-RA-NEXT: .LBB2_2: # %entry +; 32-GOOD-RA-NEXT: movl %esi, %eax +; 32-GOOD-RA-NEXT: addl $4, %esp +; 32-GOOD-RA-NEXT: popl %esi +; 32-GOOD-RA-NEXT: popl %ebx +; 32-GOOD-RA-NEXT: retl +; +; 32-FAST-RA-LABEL: test_feed_cmov: +; 32-FAST-RA: # %bb.0: # %entry +; 32-FAST-RA-NEXT: pushl %ebx +; 32-FAST-RA-NEXT: pushl %esi +; 32-FAST-RA-NEXT: pushl %eax +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax +; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx) +; 32-FAST-RA-NEXT: sete %bl +; 32-FAST-RA-NEXT: calll foo +; 32-FAST-RA-NEXT: testb %bl, %bl +; 32-FAST-RA-NEXT: jne .LBB2_2 +; 32-FAST-RA-NEXT: # %bb.1: # %entry +; 32-FAST-RA-NEXT: movl %eax, %esi +; 32-FAST-RA-NEXT: .LBB2_2: # %entry +; 32-FAST-RA-NEXT: movl %esi, %eax +; 32-FAST-RA-NEXT: addl $4, %esp +; 32-FAST-RA-NEXT: popl %esi +; 32-FAST-RA-NEXT: popl %ebx +; 32-FAST-RA-NEXT: retl +; +; 64-ALL-LABEL: test_feed_cmov: +; 64-ALL: # %bb.0: # %entry +; 64-ALL-NEXT: pushq %rbp +; 64-ALL-NEXT: pushq %rbx +; 64-ALL-NEXT: pushq %rax +; 64-ALL-NEXT: movl %edx, %ebx +; 64-ALL-NEXT: movl %esi, %eax +; 64-ALL-NEXT: lock cmpxchgl %ebx, (%rdi) +; 64-ALL-NEXT: sete %bpl +; 64-ALL-NEXT: callq foo +; 64-ALL-NEXT: testb %bpl, %bpl +; 64-ALL-NEXT: cmovnel %ebx, %eax +; 64-ALL-NEXT: addq $8, %rsp +; 64-ALL-NEXT: popq %rbx +; 64-ALL-NEXT: popq %rbp +; 64-ALL-NEXT: retq +entry: %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %res, 1 |