aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms/CodeGenPrepare
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-04-16 16:01:22 +0000
commit71d5a2540a98c81f5bcaeb48805e0e2881f530ef (patch)
tree5343938942df402b49ec7300a1c25a2d4ccd5821 /test/Transforms/CodeGenPrepare
parent31bbf64f3a4974a2d6c8b3b27ad2f519caf74057 (diff)
downloadsrc-71d5a2540a98c81f5bcaeb48805e0e2881f530ef.tar.gz
src-71d5a2540a98c81f5bcaeb48805e0e2881f530ef.zip
Notes
Diffstat (limited to 'test/Transforms/CodeGenPrepare')
-rw-r--r--test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll2
-rw-r--r--test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll216
-rw-r--r--test/Transforms/CodeGenPrepare/X86/computedgoto.ll294
-rw-r--r--test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll26
-rw-r--r--test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll8
-rw-r--r--test/Transforms/CodeGenPrepare/basic.ll42
-rw-r--r--test/Transforms/CodeGenPrepare/builtin-condition.ll20
-rw-r--r--test/Transforms/CodeGenPrepare/section.ll22
8 files changed, 601 insertions, 29 deletions
diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
index 6cec253bbf9b..2bcb3a9d1e3d 100644
--- a/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
+++ b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
@@ -5,7 +5,7 @@
; ASC-NOT: ptrtoint
; ASC-NOT: inttoptr
-define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
+define amdgpu_kernel void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
bb:
%tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
%tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll b/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll
new file mode 100644
index 000000000000..dfa81b54cc3d
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-special-cases.ll
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; No bypassing should be done in apparently unsuitable cases.
+define void @Test_no_bypassing(i32 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_no_bypassing(
+; CHECK-NEXT: [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[A_2:%.*]] = sub i64 -1, [[A_1]]
+; CHECK-NEXT: [[RES:%.*]] = srem i64 [[A_2]], [[B:%.*]]
+; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %a.1 = zext i32 %a to i64
+ ; %a.2 is always negative so the division cannot be bypassed.
+ %a.2 = sub i64 -1, %a.1
+ %res = srem i64 %a.2, %b
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+; No OR instruction is needed if one of the operands (divisor) is known
+; to fit into 32 bits.
+define void @Test_check_one_operand(i64 %a, i32 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_check_one_operand(
+; CHECK-NEXT: [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], -4294967296
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
+; CHECK: [[TMP4:%.*]] = trunc i64 [[B_1]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: br label [[TMP10:%.*]]
+; CHECK: [[TMP9:%.*]] = sdiv i64 [[A]], [[B_1]]
+; CHECK-NEXT: br label [[TMP10]]
+; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
+; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %b.1 = zext i32 %b to i64
+ %res = sdiv i64 %a, %b.1
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+; If both operands are known to fit into 32 bits, then replace the division
+; in-place without CFG modification.
+define void @Test_check_none(i64 %a, i32 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_check_none(
+; CHECK-NEXT: [[A_1:%.*]] = and i64 [[A:%.*]], 4294967295
+; CHECK-NEXT: [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_1]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[B_1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
+; CHECK-NEXT: store i64 [[TMP4]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %a.1 = and i64 %a, 4294967295
+ %b.1 = zext i32 %b to i64
+ %res = udiv i64 %a.1, %b.1
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+; In case of unsigned long division with a short dividend,
+; the long division is not needed any more.
+define void @Test_special_case(i32 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_special_case(
+; CHECK-NEXT: [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i64 [[A_1]], [[B:%.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP9:%.*]]
+; CHECK: [[TMP3:%.*]] = trunc i64 [[B]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[A_1]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = udiv i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = urem i32 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: br label [[TMP9]]
+; CHECK: [[TMP10:%.*]] = phi i64 [ [[TMP7]], [[TMP2]] ], [ 0, [[TMP0:%.*]] ]
+; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP8]], [[TMP2]] ], [ [[A_1]], [[TMP0]] ]
+; CHECK-NEXT: [[RES:%.*]] = add i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %a.1 = zext i32 %a to i64
+ %div = udiv i64 %a.1, %b
+ %rem = urem i64 %a.1, %b
+ %res = add i64 %div, %rem
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+
+; Do not bypass a division if one of the operands looks like a hash value.
+define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_xor(
+; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT: [[RES:%.*]] = udiv i64 [[C]], [[L:%.*]]
+; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %c = xor i64 %a, %b
+ %res = udiv i64 %c, %l
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_phi_xor(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[MERGE:%.*]], label [[XORPATH:%.*]]
+; CHECK: xorpath:
+; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B]]
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[E:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[C]], [[XORPATH]] ]
+; CHECK-NEXT: [[RES:%.*]] = sdiv i64 [[E]], [[L:%.*]]
+; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %cmp = icmp eq i64 %b, 0
+ br i1 %cmp, label %merge, label %xorpath
+
+xorpath:
+ %c = xor i64 %a, %b
+ br label %merge
+
+merge:
+ %e = phi i64 [ undef, %entry ], [ %c, %xorpath ]
+ %res = sdiv i64 %e, %l
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_dont_bypass_mul_long_const(
+; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], 5229553307
+; CHECK-NEXT: [[RES:%.*]] = urem i64 [[C]], [[L:%.*]]
+; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits
+ %res = urem i64 %c, %l
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: @Test_bypass_phi_mul_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A_MUL:%.*]] = mul nsw i64 [[A:%.*]], 34806414968801
+; CHECK-NEXT: [[P:%.*]] = icmp sgt i64 [[A]], [[B:%.*]]
+; CHECK-NEXT: br i1 [[P]], label [[BRANCH:%.*]], label [[MERGE:%.*]]
+; CHECK: branch:
+; CHECK-NEXT: br label [[MERGE]]
+; CHECK: merge:
+; CHECK-NEXT: [[LHS:%.*]] = phi i64 [ 42, [[BRANCH]] ], [ [[A_MUL]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[LHS]], [[B]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP8:%.*]]
+; CHECK: [[TMP4:%.*]] = trunc i64 [[B]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[LHS]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: br label [[TMP10:%.*]]
+; CHECK: [[TMP9:%.*]] = sdiv i64 [[LHS]], [[B]]
+; CHECK-NEXT: br label [[TMP10]]
+; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
+; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ %a.mul = mul nsw i64 %a, 34806414968801
+ %p = icmp sgt i64 %a, %b
+ br i1 %p, label %branch, label %merge
+
+branch:
+ br label %merge
+
+merge:
+ %lhs = phi i64 [ 42, %branch ], [ %a.mul, %entry ]
+ %res = sdiv i64 %lhs, %b
+ store i64 %res, i64* %retptr
+ ret void
+}
+
+define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) {
+; CHECK-LABEL: @Test_bypass_mul_short_const(
+; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], -42
+; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[C]], [[L:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP9:%.*]]
+; CHECK: [[TMP5:%.*]] = trunc i64 [[L]] to i32
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[C]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = urem i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
+; CHECK-NEXT: br label [[TMP11:%.*]]
+; CHECK: [[TMP10:%.*]] = urem i64 [[C]], [[L]]
+; CHECK-NEXT: br label [[TMP11]]
+; CHECK: [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ]
+; CHECK-NEXT: store i64 [[TMP12]], i64* [[RETPTR:%.*]]
+; CHECK-NEXT: ret void
+;
+ %c = mul i64 %a, -42
+ %res = urem i64 %c, %l
+ store i64 %res, i64* %retptr
+ ret void
+}
diff --git a/test/Transforms/CodeGenPrepare/X86/computedgoto.ll b/test/Transforms/CodeGenPrepare/X86/computedgoto.ll
new file mode 100644
index 000000000000..00a4df9b2c59
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/computedgoto.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @use(i32) local_unnamed_addr
+declare void @useptr([2 x i8*]*) local_unnamed_addr
+
+; CHECK: @simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
+@simple.targets = constant [2 x i8*] [i8* blockaddress(@simple, %bb0), i8* blockaddress(@simple, %bb1)], align 16
+
+; CHECK: @multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
+@multi.targets = constant [2 x i8*] [i8* blockaddress(@multi, %bb0), i8* blockaddress(@multi, %bb1)], align 16
+
+; CHECK: @loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
+@loop.targets = constant [2 x i8*] [i8* blockaddress(@loop, %bb0), i8* blockaddress(@loop, %bb1)], align 16
+
+; CHECK: @nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
+@nophi.targets = constant [2 x i8*] [i8* blockaddress(@nophi, %bb0), i8* blockaddress(@nophi, %bb1)], align 16
+
+; CHECK: @noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
+@noncritical.targets = constant [2 x i8*] [i8* blockaddress(@noncritical, %bb0), i8* blockaddress(@noncritical, %bb1)], align 16
+
+; Check that we break the critical edge when an jump table has only one use.
+define void @simple(i32* nocapture readonly %p) {
+; CHECK-LABEL: @simple(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0_CLONE:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1_CLONE:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: .split:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i32* [ [[PTR:%.*]], [[BB0:%.*]] ], [ [[INCDEC_PTR]], [[BB0_CLONE]] ]
+; CHECK-NEXT: [[MERGE2:%.*]] = phi i32 [ 0, [[BB0]] ], [ [[INITVAL]], [[BB0_CLONE]] ]
+; CHECK-NEXT: tail call void @use(i32 [[MERGE2]])
+; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[DOTSPLIT3:%.*]]
+; CHECK: .split3:
+; CHECK-NEXT: [[MERGE5:%.*]] = phi i32* [ [[PTR]], [[BB1:%.*]] ], [ [[INCDEC_PTR]], [[BB1_CLONE]] ]
+; CHECK-NEXT: [[MERGE7:%.*]] = phi i32 [ 1, [[BB1]] ], [ [[INITVAL]], [[BB1_CLONE]] ]
+; CHECK-NEXT: tail call void @use(i32 [[MERGE7]])
+; CHECK-NEXT: br label [[INDIRECTGOTO]]
+; CHECK: indirectgoto:
+; CHECK-NEXT: [[P_ADDR_SINK:%.*]] = phi i32* [ [[MERGE5]], [[DOTSPLIT3]] ], [ [[MERGE]], [[DOTSPLIT]] ]
+; CHECK-NEXT: [[PTR]] = getelementptr inbounds i32, i32* [[P_ADDR_SINK]], i64 1
+; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[P_ADDR_SINK]], align 4
+; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+; CHECK: bb0.clone:
+; CHECK-NEXT: br label [[DOTSPLIT]]
+; CHECK: bb1.clone:
+; CHECK-NEXT: br label [[DOTSPLIT3]]
+;
+entry:
+ %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %initval = load i32, i32* %p, align 4
+ %initop = load i32, i32* %incdec.ptr, align 4
+ switch i32 %initop, label %exit [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ ]
+
+bb0:
+ %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
+ %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %indirectgoto ]
+ tail call void @use(i32 %opcode.0)
+ br label %indirectgoto
+
+bb1:
+ %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %ptr, %indirectgoto ]
+ %opcode.1 = phi i32 [ %initval, %entry ], [ 1, %indirectgoto ]
+ tail call void @use(i32 %opcode.1)
+ br label %indirectgoto
+
+indirectgoto:
+ %p.addr.sink = phi i32* [ %p.addr.1, %bb1 ], [ %p.addr.0, %bb0 ]
+ %ptr = getelementptr inbounds i32, i32* %p.addr.sink, i64 1
+ %newp = load i32, i32* %p.addr.sink, align 4
+ %idx = sext i32 %newp to i64
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @simple.targets, i64 0, i64 %idx
+ %newop = load i8*, i8** %arrayidx, align 8
+ indirectbr i8* %newop, [label %bb0, label %bb1]
+
+exit:
+ ret void
+}
+
+; Don't try to break critical edges when several indirectbr point to a single block
+define void @multi(i32* nocapture readonly %p) {
+; CHECK-LABEL: @multi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT: [[INITVAL:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: [[P_ADDR_0:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY:%.*]] ], [ [[NEXT0:%.*]], [[BB0]] ], [ [[NEXT1:%.*]], [[BB1]] ]
+; CHECK-NEXT: [[OPCODE_0:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
+; CHECK-NEXT: tail call void @use(i32 [[OPCODE_0]])
+; CHECK-NEXT: [[NEXT0]] = getelementptr inbounds i32, i32* [[P_ADDR_0]], i64 1
+; CHECK-NEXT: [[NEWP0:%.*]] = load i32, i32* [[P_ADDR_0]], align 4
+; CHECK-NEXT: [[IDX0:%.*]] = sext i32 [[NEWP0]] to i64
+; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX0]]
+; CHECK-NEXT: [[NEWOP0:%.*]] = load i8*, i8** [[ARRAYIDX0]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP0]], [label [[BB0]], label %bb1]
+; CHECK: bb1:
+; CHECK-NEXT: [[P_ADDR_1:%.*]] = phi i32* [ [[INCDEC_PTR]], [[ENTRY]] ], [ [[NEXT0]], [[BB0]] ], [ [[NEXT1]], [[BB1]] ]
+; CHECK-NEXT: [[OPCODE_1:%.*]] = phi i32 [ [[INITVAL]], [[ENTRY]] ], [ 0, [[BB0]] ], [ 1, [[BB1]] ]
+; CHECK-NEXT: tail call void @use(i32 [[OPCODE_1]])
+; CHECK-NEXT: [[NEXT1]] = getelementptr inbounds i32, i32* [[P_ADDR_1]], i64 1
+; CHECK-NEXT: [[NEWP1:%.*]] = load i32, i32* [[P_ADDR_1]], align 4
+; CHECK-NEXT: [[IDX1:%.*]] = sext i32 [[NEWP1]] to i64
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 [[IDX1]]
+; CHECK-NEXT: [[NEWOP1:%.*]] = load i8*, i8** [[ARRAYIDX1]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP1]], [label [[BB0]], label %bb1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %initval = load i32, i32* %p, align 4
+ %initop = load i32, i32* %incdec.ptr, align 4
+ switch i32 %initop, label %exit [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ ]
+
+bb0:
+ %p.addr.0 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
+ %opcode.0 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
+ tail call void @use(i32 %opcode.0)
+ %next0 = getelementptr inbounds i32, i32* %p.addr.0, i64 1
+ %newp0 = load i32, i32* %p.addr.0, align 4
+ %idx0 = sext i32 %newp0 to i64
+ %arrayidx0 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx0
+ %newop0 = load i8*, i8** %arrayidx0, align 8
+ indirectbr i8* %newop0, [label %bb0, label %bb1]
+
+bb1:
+ %p.addr.1 = phi i32* [ %incdec.ptr, %entry ], [ %next0, %bb0 ], [ %next1, %bb1 ]
+ %opcode.1 = phi i32 [ %initval, %entry ], [ 0, %bb0 ], [ 1, %bb1 ]
+ tail call void @use(i32 %opcode.1)
+ %next1 = getelementptr inbounds i32, i32* %p.addr.1, i64 1
+ %newp1 = load i32, i32* %p.addr.1, align 4
+ %idx1 = sext i32 %newp1 to i64
+ %arrayidx1 = getelementptr inbounds [2 x i8*], [2 x i8*]* @multi.targets, i64 0, i64 %idx1
+ %newop1 = load i8*, i8** %arrayidx1, align 8
+ indirectbr i8* %newop1, [label %bb0, label %bb1]
+
+exit:
+ ret void
+}
+
+; Make sure we do the right thing for cases where the indirectbr branches to
+; the block it terminates.
+define void @loop(i64* nocapture readonly %p) {
+; CHECK-LABEL: @loop(
+; CHECK-NEXT: bb0.clone:
+; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: br label [[DOTSPLIT]]
+; CHECK: .split:
+; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[BB0:%.*]] ], [ 0, [[BB0_CLONE:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[MERGE]]
+; CHECK-NEXT: store i64 [[MERGE]], i64* [[TMP0]], align 4
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[MERGE]], 1
+; CHECK-NEXT: [[IDX:%.*]] = srem i64 [[MERGE]], 2
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT: [[TARGET:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT: indirectbr i8* [[TARGET]], [label [[BB0]], label %bb1]
+; CHECK: bb1:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %bb0
+
+bb0:
+ %i = phi i64 [ %i.next, %bb0 ], [ 0, %entry ]
+ %tmp0 = getelementptr inbounds i64, i64* %p, i64 %i
+ store i64 %i, i64* %tmp0, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %idx = srem i64 %i, 2
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @loop.targets, i64 0, i64 %idx
+ %target = load i8*, i8** %arrayidx, align 8
+ indirectbr i8* %target, [label %bb0, label %bb1]
+
+bb1:
+ ret void
+}
+
+; Don't do anything for cases that contain no phis.
+define void @nophi(i32* %p) {
+; CHECK-LABEL: @nophi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT: [[INITOP:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4
+; CHECK-NEXT: switch i32 [[INITOP]], label [[EXIT:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB0:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb0:
+; CHECK-NEXT: tail call void @use(i32 0)
+; CHECK-NEXT: br label [[INDIRECTGOTO:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: tail call void @use(i32 1)
+; CHECK-NEXT: br label [[INDIRECTGOTO]]
+; CHECK: indirectgoto:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to i8*
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, i8* [[TMP0]], i64 4
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR]] to i32*
+; CHECK-NEXT: [[NEWP:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT: [[IDX:%.*]] = sext i32 [[NEWP]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 [[IDX]]
+; CHECK-NEXT: [[NEWOP:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+; CHECK-NEXT: indirectbr i8* [[NEWOP]], [label [[BB0]], label %bb1]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %incdec.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %initop = load i32, i32* %incdec.ptr, align 4
+ switch i32 %initop, label %exit [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ ]
+
+bb0:
+ tail call void @use(i32 0) br label %indirectgoto
+
+bb1:
+ tail call void @use(i32 1)
+ br label %indirectgoto
+
+indirectgoto:
+ %newp = load i32, i32* %incdec.ptr, align 4
+ %idx = sext i32 %newp to i64
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @nophi.targets, i64 0, i64 %idx
+ %newop = load i8*, i8** %arrayidx, align 8
+ indirectbr i8* %newop, [label %bb0, label %bb1]
+
+exit:
+ ret void
+}
+
+; Don't do anything if the edge isn't critical.
+define i32 @noncritical(i32 %k, i8* %p)
+; CHECK-LABEL: @noncritical(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[D:%.*]] = add i32 [[K:%.*]], 1
+; CHECK-NEXT: indirectbr i8* [[P:%.*]], [label [[BB0:%.*]], label %bb1]
+; CHECK: bb0:
+; CHECK-NEXT: [[R0:%.*]] = sub i32 [[K]], [[D]]
+; CHECK-NEXT: br label [[EXIT:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[R1:%.*]] = sub i32 [[D]], [[K]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[R0]], [[BB0]] ], [ [[R1]], [[BB1:%.*]] ]
+; CHECK-NEXT: ret i32 0
+;
+{
+entry:
+ %d = add i32 %k, 1
+ indirectbr i8* %p, [label %bb0, label %bb1]
+
+bb0:
+ %v00 = phi i32 [%k, %entry]
+ %v01 = phi i32 [%d, %entry]
+ %r0 = sub i32 %v00, %v01
+ br label %exit
+
+bb1:
+ %v10 = phi i32 [%d, %entry]
+ %v11 = phi i32 [%k, %entry]
+ %r1 = sub i32 %v10, %v11
+ br label %exit
+
+exit:
+ %v = phi i32 [%r0, %bb0], [%r1, %bb1]
+ ret i32 0
+}
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
index 5c0b5f3839d0..9d6e668167fb 100644
--- a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
; Can we sink single addressing mode computation to use?
define void @test1(i1 %cond, i64* %base) {
; CHECK-LABEL: @test1
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
%casted = bitcast i64* %addr to i32*
@@ -33,7 +33,7 @@ entry:
if.then:
; CHECK-LABEL: if.then:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
@@ -41,7 +41,7 @@ if.then:
next:
; CHECK-LABEL: next:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
br label %fallthrough
@@ -61,10 +61,10 @@ entry:
if.then:
; CHECK-LABEL: if.then:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
-; CHECK-NOT: add i64 {{.+}}, 40
+; CHECK-NOT: getelementptr i8, {{.+}}, 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
br label %fallthrough
@@ -84,7 +84,7 @@ entry:
if.then:
; CHECK-LABEL: if.then:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
@@ -95,7 +95,7 @@ fallthrough:
rare.1:
; CHECK-LABEL: rare.1:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
call void @slowpath(i32 %v1, i32* %casted) cold
br label %fallthrough
}
@@ -111,7 +111,7 @@ entry:
if.then:
; CHECK-LABEL: if.then:
-; CHECK-NOT: add i64 {{.+}}, 40
+; CHECK-NOT: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
@@ -136,7 +136,7 @@ entry:
if.then:
; CHECK-LABEL: if.then:
-; CHECK-NOT: add i64 {{.+}}, 40
+; CHECK-NOT: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
@@ -162,7 +162,7 @@ entry:
if.then:
; CHECK-LABEL: if.then:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
%v1 = load i32, i32* %casted, align 4
call void @foo(i32 %v1)
%cmp = icmp eq i32 %v1, 0
@@ -170,7 +170,7 @@ if.then:
next:
; CHECK-LABEL: next:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
%v2 = load i32, i32* %casted, align 4
call void @foo(i32 %v2)
%cmp2 = icmp eq i32 %v2, 0
@@ -181,13 +181,13 @@ fallthrough:
rare.1:
; CHECK-LABEL: rare.1:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
call void @slowpath(i32 %v1, i32* %casted) cold
br label %next
rare.2:
; CHECK-LABEL: rare.2:
-; CHECK: add i64 {{.+}}, 40
+; CHECK: getelementptr i8, {{.+}} 40
call void @slowpath(i32 %v2, i32* %casted) cold
br label %fallthrough
}
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
index c9f49b5d4f86..31f0ca239e3a 100644
--- a/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
@@ -1,11 +1,12 @@
-; RUN: opt -S -codegenprepare < %s | FileCheck %s
+; RUN: opt -S -codegenprepare < %s | FileCheck %s -check-prefix=CHECK -check-prefix=GEP
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: @load_cast_gep
-; CHECK: add i64 %sunkaddr, 40
+; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
+; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 40
define void @load_cast_gep(i1 %cond, i64* %base) {
entry:
%addr = getelementptr inbounds i64, i64* %base, i64 5
@@ -21,7 +22,8 @@ fallthrough:
}
; CHECK-LABEL: @store_gep_cast
-; CHECK: add i64 %sunkaddr, 20
+; GEP: [[CAST:%[0-9]+]] = addrspacecast i64* %base to i8 addrspace(1)*
+; GEP: getelementptr i8, i8 addrspace(1)* [[CAST]], i64 20
define void @store_gep_cast(i1 %cond, i64* %base) {
entry:
%casted = addrspacecast i64* %base to i32 addrspace(1)*
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index 495d910b5cd6..2e58de7d0934 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin10.0.0"
; rdar://8785296
define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
entry:
- %0 = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+ %0 = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false, i1 false)
%1 = icmp ugt i64 %0, 3
br i1 %1, label %T, label %trap
@@ -25,6 +25,44 @@ T:
ret i32 4
}
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+; CHECK-LABEL: @test_objectsize_null_flag(
+define i64 @test_objectsize_null_flag(i8* %ptr) {
+entry:
+ ; CHECK: ret i64 -1
+ %0 = tail call i64 @llvm.objectsize.i64(i8* null, i1 false, i1 true)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test_objectsize_null_flag_min(
+define i64 @test_objectsize_null_flag_min(i8* %ptr) {
+entry:
+ ; CHECK: ret i64 0
+ %0 = tail call i64 @llvm.objectsize.i64(i8* null, i1 true, i1 true)
+ ret i64 %0
+}
+
+; Test foldable null pointers because we evaluate them with non-exact modes in
+; CodeGenPrepare.
+; CHECK-LABEL: @test_objectsize_null_flag_noas0(
+define i64 @test_objectsize_null_flag_noas0() {
+entry:
+ ; CHECK: ret i64 0
+ %0 = tail call i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)* null, i1 false,
+ i1 true)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test_objectsize_null_flag_min_noas0(
+define i64 @test_objectsize_null_flag_min_noas0() {
+entry:
+ ; CHECK: ret i64 0
+ %0 = tail call i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)* null, i1 true,
+ i1 true)
+ ret i64 %0
+}
+
+
+declare i64 @llvm.objectsize.i64(i8*, i1, i1) nounwind readonly
+declare i64 @llvm.objectsize.i64.p1i8(i8 addrspace(1)*, i1, i1) nounwind readonly
declare void @llvm.trap() nounwind
diff --git a/test/Transforms/CodeGenPrepare/builtin-condition.ll b/test/Transforms/CodeGenPrepare/builtin-condition.ll
index 0d41e9e1eddb..e42529a7b9a1 100644
--- a/test/Transforms/CodeGenPrepare/builtin-condition.ll
+++ b/test/Transforms/CodeGenPrepare/builtin-condition.ll
@@ -74,39 +74,39 @@ entry:
%chararray = alloca [30 x i8], align 16
%chararray2 = alloca [10 x i8], align 1
%0 = getelementptr inbounds [30 x i8], [30 x i8]* %chararray, i64 0, i64 0
- call void @llvm.lifetime.start(i64 30, i8* %0)
+ call void @llvm.lifetime.start.p0i8(i64 30, i8* %0)
%1 = getelementptr inbounds [10 x i8], [10 x i8]* %chararray2, i64 0, i64 0
- call void @llvm.lifetime.start(i64 10, i8* %1)
+ call void @llvm.lifetime.start.p0i8(i64 10, i8* %1)
%tobool = icmp eq i32 %flag, 0
%cptr.0 = select i1 %tobool, i8* %0, i8* %1
%2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cptr.0, i1 true)
- call void @llvm.lifetime.end(i64 10, i8* %1)
- call void @llvm.lifetime.end(i64 30, i8* %0)
+ call void @llvm.lifetime.end.p0i8(i64 10, i8* %1)
+ call void @llvm.lifetime.end.p0i8(i64 30, i8* %0)
ret i64 %2
; CHECK-LABEL: foo1
; CHECK: ret i64 10
}
-declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
-declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
define i64 @foo2(i32 %n) {
entry:
%Small = alloca [10 x i8], align 1
%Large = alloca [20 x i8], align 16
%0 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
- call void @llvm.lifetime.start(i64 10, i8* %0)
+ call void @llvm.lifetime.start.p0i8(i64 10, i8* %0)
%1 = getelementptr inbounds [20 x i8], [20 x i8]* %Large, i64 0, i64 0
- call void @llvm.lifetime.start(i64 20, i8* %1)
+ call void @llvm.lifetime.start.p0i8(i64 20, i8* %1)
%tobool = icmp ne i32 %n, 0
%add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Large, i64 0, i64 19
%cond = select i1 %tobool, i8* %0, i8* %add.ptr
%2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
- call void @llvm.lifetime.end(i64 20, i8* %1)
- call void @llvm.lifetime.end(i64 10, i8* %0)
+ call void @llvm.lifetime.end.p0i8(i64 20, i8* %1)
+ call void @llvm.lifetime.end.p0i8(i64 10, i8* %0)
ret i64 %2
; CHECK-LABEL: foo2
; CHECK: ret i64 10
diff --git a/test/Transforms/CodeGenPrepare/section.ll b/test/Transforms/CodeGenPrepare/section.ll
index 795c45c220db..2c96612e1baf 100644
--- a/test/Transforms/CodeGenPrepare/section.ll
+++ b/test/Transforms/CodeGenPrepare/section.ll
@@ -5,12 +5,32 @@ target triple = "x86_64-pc-linux-gnu"
; This tests that hot/cold functions get correct section prefix assigned
; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
+; The entry is hot
define void @hot_func() !prof !15 {
ret void
}
+; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
+; The sum of 2 callsites are hot
+define void @hot_call_func() !prof !16 {
+ call void @hot_func(), !prof !17
+ call void @hot_func(), !prof !17
+ ret void
+}
+
+; CHECK-NOT: normal_func{{.*}}!section_prefix
+; The sum of all callsites are neither hot or cold
+define void @normal_func() !prof !16 {
+ call void @hot_func(), !prof !17
+ call void @hot_func(), !prof !18
+ call void @hot_func(), !prof !18
+ ret void
+}
+
; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
+; The entry and the callsite are both cold
define void @cold_func() !prof !16 {
+ call void @hot_func(), !prof !18
ret void
}
@@ -33,3 +53,5 @@ define void @cold_func() !prof !16 {
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"function_entry_count", i64 1000}
!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"branch_weights", i32 80}
+!18 = !{!"branch_weights", i32 1}