diff options
Diffstat (limited to 'test/CodeGen/AArch64')
81 files changed, 8636 insertions, 0 deletions
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll new file mode 100644 index 000000000000..7cb373232a2c --- /dev/null +++ b/test/CodeGen/AArch64/adc.ll @@ -0,0 +1,54 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i128 @test_simple(i128 %a, i128 %b, i128 %c) { +; CHECK: test_simple: + + %valadd = add i128 %a, %b +; CHECK: adds [[ADDLO:x[0-9]+]], x0, x2 +; CHECK-NEXT: adcs [[ADDHI:x[0-9]+]], x1, x3 + + %valsub = sub i128 %valadd, %c +; CHECK: subs x0, [[ADDLO]], x4 +; CHECK: sbcs x1, [[ADDHI]], x5 + + ret i128 %valsub +; CHECK: ret +} + +define i128 @test_imm(i128 %a) { +; CHECK: test_imm: + + %val = add i128 %a, 12 +; CHECK: adds x0, x0, #12 +; CHECK: adcs x1, x1, {{x[0-9]|xzr}} + + ret i128 %val +; CHECK: ret +} + +define i128 @test_shifted(i128 %a, i128 %b) { +; CHECK: test_shifted: + + %rhs = shl i128 %b, 45 + + %val = add i128 %a, %rhs +; CHECK: adds x0, x0, x2, lsl #45 +; CHECK: adcs x1, x1, {{x[0-9]}} + + ret i128 %val +; CHECK: ret +} + +define i128 @test_extended(i128 %a, i16 %b) { +; CHECK: test_extended: + + %ext = sext i16 %b to i128 + %rhs = shl i128 %ext, 3 + + %val = add i128 %a, %rhs +; CHECK: adds x0, x0, w2, sxth #3 +; CHECK: adcs x1, x1, {{x[0-9]}} + + ret i128 %val +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll new file mode 100644 index 000000000000..f2c74f6952b0 --- /dev/null +++ b/test/CodeGen/AArch64/addsub-shifted.ll @@ -0,0 +1,295 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { +; CHECK: test_lsl_arith: + + %rhs1 = load volatile i32* @var32 + %shift1 = shl i32 %rhs1, 18 + %val1 = add i32 %lhs32, %shift1 + store volatile i32 %val1, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18 + + %rhs2 = load volatile i32* @var32 + %shift2 = shl i32 %rhs2, 31 + %val2 = add i32 %shift2, %lhs32 + store volatile i32 %val2, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + + %rhs3 = load volatile i32* @var32 + %shift3 = shl i32 %rhs3, 5 + %val3 = sub i32 %lhs32, %shift3 + store volatile i32 %val3, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5 + +; Subtraction is not commutative! + %rhs4 = load volatile i32* @var32 + %shift4 = shl i32 %rhs4, 19 + %val4 = sub i32 %shift4, %lhs32 + store volatile i32 %val4, i32* @var32 +; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19 + + %lhs4a = load volatile i32* @var32 + %shift4a = shl i32 %lhs4a, 15 + %val4a = sub i32 0, %shift4a + store volatile i32 %val4a, i32* @var32 +; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsl #15 + + %rhs5 = load volatile i64* @var64 + %shift5 = shl i64 %rhs5, 18 + %val5 = add i64 %lhs64, %shift5 + store volatile i64 %val5, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18 + + %rhs6 = load volatile i64* @var64 + %shift6 = shl i64 %rhs6, 31 + %val6 = add i64 %shift6, %lhs64 + store volatile i64 %val6, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31 + + %rhs7 = load volatile i64* @var64 + %shift7 = shl i64 %rhs7, 5 + %val7 = sub i64 %lhs64, %shift7 + store volatile i64 %val7, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5 + +; Subtraction is not commutative! + %rhs8 = load volatile i64* @var64 + %shift8 = shl i64 %rhs8, 19 + %val8 = sub i64 %shift8, %lhs64 + store volatile i64 %val8, i64* @var64 +; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19 + + %lhs8a = load volatile i64* @var64 + %shift8a = shl i64 %lhs8a, 60 + %val8a = sub i64 0, %shift8a + store volatile i64 %val8a, i64* @var64 +; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsl #60 + + ret void +; CHECK: ret +} + +define void @test_lsr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { +; CHECK: test_lsr_arith: + + %shift1 = lshr i32 %rhs32, 18 + %val1 = add i32 %lhs32, %shift1 + store volatile i32 %val1, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #18 + + %shift2 = lshr i32 %rhs32, 31 + %val2 = add i32 %shift2, %lhs32 + store volatile i32 %val2, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #31 + + %shift3 = lshr i32 %rhs32, 5 + %val3 = sub i32 %lhs32, %shift3 + store volatile i32 %val3, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #5 + +; Subtraction is not commutative! + %shift4 = lshr i32 %rhs32, 19 + %val4 = sub i32 %shift4, %lhs32 + store volatile i32 %val4, i32* @var32 +; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #19 + + %shift4a = lshr i32 %lhs32, 15 + %val4a = sub i32 0, %shift4a + store volatile i32 %val4a, i32* @var32 +; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, lsr #15 + + %shift5 = lshr i64 %rhs64, 18 + %val5 = add i64 %lhs64, %shift5 + store volatile i64 %val5, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #18 + + %shift6 = lshr i64 %rhs64, 31 + %val6 = add i64 %shift6, %lhs64 + store volatile i64 %val6, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #31 + + %shift7 = lshr i64 %rhs64, 5 + %val7 = sub i64 %lhs64, %shift7 + store volatile i64 %val7, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #5 + +; Subtraction is not commutative! + %shift8 = lshr i64 %rhs64, 19 + %val8 = sub i64 %shift8, %lhs64 + store volatile i64 %val8, i64* @var64 +; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #19 + + %shift8a = lshr i64 %lhs64, 45 + %val8a = sub i64 0, %shift8a + store volatile i64 %val8a, i64* @var64 +; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, lsr #45 + + ret void +; CHECK: ret +} + +define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { +; CHECK: test_asr_arith: + + %shift1 = ashr i32 %rhs32, 18 + %val1 = add i32 %lhs32, %shift1 + store volatile i32 %val1, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #18 + + %shift2 = ashr i32 %rhs32, 31 + %val2 = add i32 %shift2, %lhs32 + store volatile i32 %val2, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #31 + + %shift3 = ashr i32 %rhs32, 5 + %val3 = sub i32 %lhs32, %shift3 + store volatile i32 %val3, i32* @var32 +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #5 + +; Subtraction is not commutative! + %shift4 = ashr i32 %rhs32, 19 + %val4 = sub i32 %shift4, %lhs32 + store volatile i32 %val4, i32* @var32 +; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #19 + + %shift4a = ashr i32 %lhs32, 15 + %val4a = sub i32 0, %shift4a + store volatile i32 %val4a, i32* @var32 +; CHECK: sub {{w[0-9]+}}, wzr, {{w[0-9]+}}, asr #15 + + %shift5 = ashr i64 %rhs64, 18 + %val5 = add i64 %lhs64, %shift5 + store volatile i64 %val5, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #18 + + %shift6 = ashr i64 %rhs64, 31 + %val6 = add i64 %shift6, %lhs64 + store volatile i64 %val6, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #31 + + %shift7 = ashr i64 %rhs64, 5 + %val7 = sub i64 %lhs64, %shift7 + store volatile i64 %val7, i64* @var64 +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #5 + +; Subtraction is not commutative! + %shift8 = ashr i64 %rhs64, 19 + %val8 = sub i64 %shift8, %lhs64 + store volatile i64 %val8, i64* @var64 +; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #19 + + %shift8a = ashr i64 %lhs64, 45 + %val8a = sub i64 0, %shift8a + store volatile i64 %val8a, i64* @var64 +; CHECK: sub {{x[0-9]+}}, xzr, {{x[0-9]+}}, asr #45 + + ret void +; CHECK: ret +} + +define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { +; CHECK: test_cmp: + + %shift1 = shl i32 %rhs32, 13 + %tst1 = icmp uge i32 %lhs32, %shift1 + br i1 %tst1, label %t2, label %end +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13 + +t2: + %shift2 = lshr i32 %rhs32, 20 + %tst2 = icmp ne i32 %lhs32, %shift2 + br i1 %tst2, label %t3, label %end +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20 + +t3: + %shift3 = ashr i32 %rhs32, 9 + %tst3 = icmp ne i32 %lhs32, %shift3 + br i1 %tst3, label %t4, label %end +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9 + +t4: + %shift4 = shl i64 %rhs64, 43 + %tst4 = icmp uge i64 %lhs64, %shift4 + br i1 %tst4, label %t5, label %end +; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43 + +t5: + %shift5 = lshr i64 %rhs64, 20 + %tst5 = icmp ne i64 %lhs64, %shift5 + br i1 %tst5, label %t6, label %end +; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20 + +t6: + %shift6 = ashr i64 %rhs64, 59 + %tst6 = icmp ne i64 %lhs64, %shift6 + br i1 %tst6, label %t7, label %end +; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59 + +t7: + ret i32 1 +end: + + ret i32 0 +; CHECK: ret +} + +define i32 @test_cmn(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) { +; CHECK: test_cmn: + + %shift1 = shl i32 %rhs32, 13 + %val1 = sub i32 0, %shift1 + %tst1 = icmp uge i32 %lhs32, %val1 + br i1 %tst1, label %t2, label %end + ; Important that this isn't lowered to a cmn instruction because if %rhs32 == + ; 0 then the results will differ. +; CHECK: sub [[RHS:w[0-9]+]], wzr, {{w[0-9]+}}, lsl #13 +; CHECK: cmp {{w[0-9]+}}, [[RHS]] + +t2: + %shift2 = lshr i32 %rhs32, 20 + %val2 = sub i32 0, %shift2 + %tst2 = icmp ne i32 %lhs32, %val2 + br i1 %tst2, label %t3, label %end +; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, lsr #20 + +t3: + %shift3 = ashr i32 %rhs32, 9 + %val3 = sub i32 0, %shift3 + %tst3 = icmp eq i32 %lhs32, %val3 + br i1 %tst3, label %t4, label %end +; CHECK: cmn {{w[0-9]+}}, {{w[0-9]+}}, asr #9 + +t4: + %shift4 = shl i64 %rhs64, 43 + %val4 = sub i64 0, %shift4 + %tst4 = icmp slt i64 %lhs64, %val4 + br i1 %tst4, label %t5, label %end + ; Again, it's important that cmn isn't used here in case %rhs64 == 0. +; CHECK: sub [[RHS:x[0-9]+]], xzr, {{x[0-9]+}}, lsl #43 +; CHECK: cmp {{x[0-9]+}}, [[RHS]] + +t5: + %shift5 = lshr i64 %rhs64, 20 + %val5 = sub i64 0, %shift5 + %tst5 = icmp ne i64 %lhs64, %val5 + br i1 %tst5, label %t6, label %end +; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, lsr #20 + +t6: + %shift6 = ashr i64 %rhs64, 59 + %val6 = sub i64 0, %shift6 + %tst6 = icmp ne i64 %lhs64, %val6 + br i1 %tst6, label %t7, label %end +; CHECK: cmn {{x[0-9]+}}, {{x[0-9]+}}, asr #59 + +t7: + ret i32 1 +end: + + ret i32 0 +; CHECK: ret +} + diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll new file mode 100644 index 000000000000..5148807163c9 --- /dev/null +++ b/test/CodeGen/AArch64/addsub.ll @@ -0,0 +1,127 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; Note that this should be refactored (for efficiency if nothing else) +; when the PCS is implemented so we don't have to worry about the +; loads and stores. + +@var_i32 = global i32 42 +@var_i64 = global i64 0 + +; Add pure 12-bit immediates: +define void @add_small() { +; CHECK: add_small: + +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095 + %val32 = load i32* @var_i32 + %newval32 = add i32 %val32, 4095 + store i32 %newval32, i32* @var_i32 + +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52 + %val64 = load i64* @var_i64 + %newval64 = add i64 %val64, 52 + store i64 %newval64, i64* @var_i64 + + ret void +} + +; Add 12-bit immediates, shifted left by 12 bits +define void @add_med() { +; CHECK: add_med: + +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12 + %val32 = load i32* @var_i32 + %newval32 = add i32 %val32, 14610432 ; =0xdef000 + store i32 %newval32, i32* @var_i32 + +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12 + %val64 = load i64* @var_i64 + %newval64 = add i64 %val64, 16773120 ; =0xfff000 + store i64 %newval64, i64* @var_i64 + + ret void +} + +; Subtract 12-bit immediates +define void @sub_small() { +; CHECK: sub_small: + +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095 + %val32 = load i32* @var_i32 + %newval32 = sub i32 %val32, 4095 + store i32 %newval32, i32* @var_i32 + +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52 + %val64 = load i64* @var_i64 + %newval64 = sub i64 %val64, 52 + store i64 %newval64, i64* @var_i64 + + ret void +} + +; Subtract 12-bit immediates, shifted left by 12 bits +define void @sub_med() { +; CHECK: sub_med: + +; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #3567, lsl #12 + %val32 = load i32* @var_i32 + %newval32 = sub i32 %val32, 14610432 ; =0xdef000 + store i32 %newval32, i32* @var_i32 + +; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #4095, lsl #12 + %val64 = load i64* @var_i64 + %newval64 = sub i64 %val64, 16773120 ; =0xfff000 + store i64 %newval64, i64* @var_i64 + + ret void +} + +define void @testing() { +; CHECK: testing: + %val = load i32* @var_i32 + +; CHECK: cmp {{w[0-9]+}}, #4095 +; CHECK: b.ne .LBB4_6 + %cmp_pos_small = icmp ne i32 %val, 4095 + br i1 %cmp_pos_small, label %ret, label %test2 + +test2: +; CHECK: cmp {{w[0-9]+}}, #3567, lsl #12 +; CHECK: b.lo .LBB4_6 + %newval2 = add i32 %val, 1 + store i32 %newval2, i32* @var_i32 + %cmp_pos_big = icmp ult i32 %val, 14610432 + br i1 %cmp_pos_big, label %ret, label %test3 + +test3: +; CHECK: cmp {{w[0-9]+}}, #123 +; CHECK: b.lt .LBB4_6 + %newval3 = add i32 %val, 2 + store i32 %newval3, i32* @var_i32 + %cmp_pos_slt = icmp slt i32 %val, 123 + br i1 %cmp_pos_slt, label %ret, label %test4 + +test4: +; CHECK: cmp {{w[0-9]+}}, #321 +; CHECK: b.gt .LBB4_6 + %newval4 = add i32 %val, 3 + store i32 %newval4, i32* @var_i32 + %cmp_pos_sgt = icmp sgt i32 %val, 321 + br i1 %cmp_pos_sgt, label %ret, label %test5 + +test5: +; CHECK: cmn {{w[0-9]+}}, #444 +; CHECK: b.gt .LBB4_6 + %newval5 = add i32 %val, 4 + store i32 %newval5, i32* @var_i32 + %cmp_neg_uge = icmp sgt i32 %val, -444 + br i1 %cmp_neg_uge, label %ret, label %test6 + +test6: + %newval6 = add i32 %val, 5 + store i32 %newval6, i32* @var_i32 + ret void + +ret: + ret void +} +; TODO: adds/subs diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll new file mode 100644 index 000000000000..2dd16626ea9f --- /dev/null +++ b/test/CodeGen/AArch64/addsub_ext.ll @@ -0,0 +1,189 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define void @addsub_i8rhs() { +; CHECK: addsub_i8rhs: + %val8_tmp = load i8* @var8 + %lhs32 = load i32* @var32 + %lhs64 = load i64* @var64 + + ; Need this to prevent extension upon load and give a vanilla i8 operand. + %val8 = add i8 %val8_tmp, 123 + + +; Zero-extending to 32-bits + %rhs32_zext = zext i8 %val8 to i32 + %res32_zext = add i32 %lhs32, %rhs32_zext + store volatile i32 %res32_zext, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb + + %rhs32_zext_shift = shl i32 %rhs32_zext, 3 + %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift + store volatile i32 %res32_zext_shift, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3 + + +; Zero-extending to 64-bits + %rhs64_zext = zext i8 %val8 to i64 + %res64_zext = add i64 %lhs64, %rhs64_zext + store volatile i64 %res64_zext, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb + + %rhs64_zext_shift = shl i64 %rhs64_zext, 1 + %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift + store volatile i64 %res64_zext_shift, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1 + +; Sign-extending to 32-bits + %rhs32_sext = sext i8 %val8 to i32 + %res32_sext = add i32 %lhs32, %rhs32_sext + store volatile i32 %res32_sext, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb + + %rhs32_sext_shift = shl i32 %rhs32_sext, 1 + %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift + store volatile i32 %res32_sext_shift, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1 + +; Sign-extending to 64-bits + %rhs64_sext = sext i8 %val8 to i64 + %res64_sext = add i64 %lhs64, %rhs64_sext + store volatile i64 %res64_sext, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb + + %rhs64_sext_shift = shl i64 %rhs64_sext, 4 + %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift + store volatile i64 %res64_sext_shift, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4 + + +; CMP variants + %tst = icmp slt i32 %lhs32, %rhs32_zext + br i1 %tst, label %end, label %test2 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtb + +test2: + %cmp_sext = sext i8 %val8 to i64 + %tst2 = icmp eq i64 %lhs64, %cmp_sext + br i1 %tst2, label %other, label %end +; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxtb + +other: + store volatile i32 %lhs32, i32* @var32 + ret void + +end: + ret void +} + +define void @addsub_i16rhs() { +; CHECK: addsub_i16rhs: + %val16_tmp = load i16* @var16 + %lhs32 = load i32* @var32 + %lhs64 = load i64* @var64 + + ; Need this to prevent extension upon load and give a vanilla i16 operand. + %val16 = add i16 %val16_tmp, 123 + + +; Zero-extending to 32-bits + %rhs32_zext = zext i16 %val16 to i32 + %res32_zext = add i32 %lhs32, %rhs32_zext + store volatile i32 %res32_zext, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth + + %rhs32_zext_shift = shl i32 %rhs32_zext, 3 + %res32_zext_shift = add i32 %lhs32, %rhs32_zext_shift + store volatile i32 %res32_zext_shift, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3 + + +; Zero-extending to 64-bits + %rhs64_zext = zext i16 %val16 to i64 + %res64_zext = add i64 %lhs64, %rhs64_zext + store volatile i64 %res64_zext, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth + + %rhs64_zext_shift = shl i64 %rhs64_zext, 1 + %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift + store volatile i64 %res64_zext_shift, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1 + +; Sign-extending to 32-bits + %rhs32_sext = sext i16 %val16 to i32 + %res32_sext = add i32 %lhs32, %rhs32_sext + store volatile i32 %res32_sext, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth + + %rhs32_sext_shift = shl i32 %rhs32_sext, 1 + %res32_sext_shift = add i32 %lhs32, %rhs32_sext_shift + store volatile i32 %res32_sext_shift, i32* @var32 +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1 + +; Sign-extending to 64-bits + %rhs64_sext = sext i16 %val16 to i64 + %res64_sext = add i64 %lhs64, %rhs64_sext + store volatile i64 %res64_sext, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth + + %rhs64_sext_shift = shl i64 %rhs64_sext, 4 + %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift + store volatile i64 %res64_sext_shift, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4 + + +; CMP variants + %tst = icmp slt i32 %lhs32, %rhs32_zext + br i1 %tst, label %end, label %test2 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxth + +test2: + %cmp_sext = sext i16 %val16 to i64 + %tst2 = icmp eq i64 %lhs64, %cmp_sext + br i1 %tst2, label %other, label %end +; CHECK: cmp {{x[0-9]+}}, {{w[0-9]+}}, sxth + +other: + store volatile i32 %lhs32, i32* @var32 + ret void + +end: + ret void +} + +; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for +; example), but the remaining instructions are probably not idiomatic +; in the face of "add/sub (shifted register)" so I don't intend to. +define void @addsub_i32rhs() { +; CHECK: addsub_i32rhs: + %val32_tmp = load i32* @var32 + %lhs64 = load i64* @var64 + + %val32 = add i32 %val32_tmp, 123 + + %rhs64_zext = zext i32 %val32 to i64 + %res64_zext = add i64 %lhs64, %rhs64_zext + store volatile i64 %res64_zext, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw + + %rhs64_zext_shift = shl i64 %rhs64_zext, 2 + %res64_zext_shift = add i64 %lhs64, %rhs64_zext_shift + store volatile i64 %res64_zext_shift, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2 + + %rhs64_sext = sext i32 %val32 to i64 + %res64_sext = add i64 %lhs64, %rhs64_sext + store volatile i64 %res64_sext, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw + + %rhs64_sext_shift = shl i64 %rhs64_sext, 2 + %res64_sext_shift = add i64 %lhs64, %rhs64_sext_shift + store volatile i64 %res64_sext_shift, i64* @var64 +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2 + + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll new file mode 100644 index 000000000000..c33b442624a5 --- /dev/null +++ b/test/CodeGen/AArch64/adrp-relocation.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s + +define i64 @testfn() nounwind { +entry: + ret i64 0 +} + +define i64 @foo() nounwind { +entry: + %bar = alloca i64 ()*, align 8 + store i64 ()* @testfn, i64 ()** %bar, align 8 + %call = call i64 @testfn() + ret i64 %call +} + +; The above should produce an ADRP/ADD pair to calculate the address of +; testfn. The important point is that LLVM shouldn't think it can deal with the +; relocation on the ADRP itself (even though it knows everything about the +; relative offsets of testfn and foo) because its value depends on where this +; object file's .text section gets relocated in memory. + +; CHECK: .rela.text + +; CHECK: # Relocation 0 +; CHECK-NEXT: (('r_offset', 0x0000000000000010) +; CHECK-NEXT: ('r_sym', 0x00000007) +; CHECK-NEXT: ('r_type', 0x00000113) +; CHECK-NEXT: ('r_addend', 0x0000000000000000) +; CHECK-NEXT: ), +; CHECK-NEXT: Relocation 1 +; CHECK-NEXT: (('r_offset', 0x0000000000000014) +; CHECK-NEXT: ('r_sym', 0x00000007) +; CHECK-NEXT: ('r_type', 0x00000115) +; CHECK-NEXT: ('r_addend', 0x0000000000000000) +; CHECK-NEXT: ), diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll new file mode 100644 index 000000000000..c62edf6503c6 --- /dev/null +++ b/test/CodeGen/AArch64/alloca.ll @@ -0,0 +1,134 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +declare void @use_addr(i8*) + +define void @test_simple_alloca(i64 %n) { +; CHECK: test_simple_alloca: + + %buf = alloca i8, i64 %n + ; Make sure we align the stack change to 16 bytes: +; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15 +; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0 + + ; Make sure we change SP. It would be surprising if anything but x0 were used + ; for the final sp, but it could be if it was then moved into x0. +; CHECK: mov [[TMP:x[0-9]+]], sp +; CHECK: sub x0, [[TMP]], [[SPDELTA]] +; CHECK: mov sp, x0 + + call void @use_addr(i8* %buf) +; CHECK: bl use_addr + + ret void + ; Make sure epilogue restores sp from fp +; CHECK: sub sp, x29, #16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: add sp, sp, #32 +; CHECK: ret +} + +declare void @use_addr_loc(i8*, i64*) + +define i64 @test_alloca_with_local(i64 %n) { +; CHECK: test_alloca_with_local: +; CHECK: sub sp, sp, #32 +; CHECK: stp x29, x30, [sp, #16] + + %loc = alloca i64 + %buf = alloca i8, i64 %n + ; Make sure we align the stack change to 16 bytes: +; CHECK: add [[SPDELTA:x[0-9]+]], x0, #15 +; CHECK: and x0, [[SPDELTA]], #0xfffffffffffffff0 + + ; Make sure we change SP. It would be surprising if anything but x0 were used + ; for the final sp, but it could be if it was then moved into x0. +; CHECK: mov [[TMP:x[0-9]+]], sp +; CHECK: sub x0, [[TMP]], [[SPDELTA]] +; CHECK: mov sp, x0 + + ; Obviously suboptimal code here, but it to get &local in x1 +; CHECK: sub [[TMP:x[0-9]+]], x29, [[LOC_FROM_FP:#[0-9]+]] +; CHECK: add x1, [[TMP]], #0 + + call void @use_addr_loc(i8* %buf, i64* %loc) +; CHECK: bl use_addr + + %val = load i64* %loc +; CHECK: sub x[[TMP:[0-9]+]], x29, [[LOC_FROM_FP]] +; CHECK: ldr x0, [x[[TMP]]] + + ret i64 %val + ; Make sure epilogue restores sp from fp +; CHECK: sub sp, x29, #16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: add sp, sp, #32 +; CHECK: ret +} + +define void @test_variadic_alloca(i64 %n, ...) { +; CHECK: test_variadic_alloca: + +; CHECK: sub sp, sp, #208 +; CHECK: stp x29, x30, [sp, #192] +; CHECK: add x29, sp, #192 +; CHECK: sub [[TMP:x[0-9]+]], x29, #192 +; CHECK: add x8, [[TMP]], #0 +; CHECK: str q7, [x8, #112] +; [...] +; CHECK: str q1, [x8, #16] + + %addr = alloca i8, i64 %n + + call void @use_addr(i8* %addr) +; CHECK: bl use_addr + + ret void +; CHECK: sub sp, x29, #192 +; CHECK: ldp x29, x30, [sp, #192] +; CHECK: add sp, sp, #208 +} + +define void @test_alloca_large_frame(i64 %n) { +; CHECK: test_alloca_large_frame: + +; CHECK: sub sp, sp, #496 +; CHECK: stp x29, x30, [sp, #480] +; CHECK: add x29, sp, #480 +; CHECK: sub sp, sp, #48 +; CHECK: sub sp, sp, #1953, lsl #12 + + %addr1 = alloca i8, i64 %n + %addr2 = alloca i64, i64 1000000 + + call void @use_addr_loc(i8* %addr1, i64* %addr2) + + ret void +; CHECK: sub sp, x29, #480 +; CHECK: ldp x29, x30, [sp, #480] +; CHECK: add sp, sp, #496 +} + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) + +define void @test_scoped_alloca(i64 %n) { +; CHECK: test_scoped_alloca +; CHECK: sub sp, sp, #32 + + %sp = call i8* @llvm.stacksave() +; CHECK: mov [[SAVED_SP:x[0-9]+]], sp + + %addr = alloca i8, i64 %n +; CHECK: and [[SPDELTA:x[0-9]+]], {{x[0-9]+}}, #0xfffffffffffffff0 +; CHECK: mov [[OLDSP:x[0-9]+]], sp +; CHECK: sub [[NEWSP:x[0-9]+]], [[OLDSP]], [[SPDELTA]] +; CHECK: mov sp, [[NEWSP]] + + call void @use_addr(i8* %addr) +; CHECK: bl use_addr + + call void @llvm.stackrestore(i8* %sp) +; CHECK: mov sp, [[SAVED_SP]] + + ret void +} diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll new file mode 100644 index 000000000000..e10bbb0f8691 --- /dev/null +++ b/test/CodeGen/AArch64/analyze-branch.ll @@ -0,0 +1,231 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; This test checks that LLVM can do basic stripping and reapplying of branches +; to basic blocks. + +declare void @test_true() +declare void @test_false() + +; !0 corresponds to a branch being taken, !1 to not being takne. +!0 = metadata !{metadata !"branch_weights", i32 64, i32 4} +!1 = metadata !{metadata !"branch_weights", i32 4, i32 64} + +define void @test_Bcc_fallthrough_taken(i32 %in) nounwind { +; CHECK: test_Bcc_fallthrough_taken: + %tst = icmp eq i32 %in, 42 + br i1 %tst, label %true, label %false, !prof !0 + +; CHECK: cmp {{w[0-9]+}}, #42 + +; CHECK: b.ne [[FALSE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_true + +; CHECK: [[FALSE]]: +; CHECK: bl test_false + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind { +; CHECK: test_Bcc_fallthrough_nottaken: + %tst = icmp eq i32 %in, 42 + br i1 %tst, label %true, label %false, !prof !1 + +; CHECK: cmp {{w[0-9]+}}, #42 + +; CHECK: b.eq [[TRUE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_false + +; CHECK: [[TRUE]]: +; CHECK: bl test_true + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_CBZ_fallthrough_taken(i32 %in) nounwind { +; CHECK: test_CBZ_fallthrough_taken: + %tst = icmp eq i32 %in, 0 + br i1 %tst, label %true, label %false, !prof !0 + +; CHECK: cbnz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_true + +; CHECK: [[FALSE]]: +; CHECK: bl test_false + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_CBZ_fallthrough_nottaken(i64 %in) nounwind { +; CHECK: test_CBZ_fallthrough_nottaken: + %tst = icmp eq i64 %in, 0 + br i1 %tst, label %true, label %false, !prof !1 + +; CHECK: cbz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_false + +; CHECK: [[TRUE]]: +; CHECK: bl test_true + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_CBNZ_fallthrough_taken(i32 %in) nounwind { +; CHECK: test_CBNZ_fallthrough_taken: + %tst = icmp ne i32 %in, 0 + br i1 %tst, label %true, label %false, !prof !0 + +; CHECK: cbz {{w[0-9]+}}, [[FALSE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_true + +; CHECK: [[FALSE]]: +; CHECK: bl test_false + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_CBNZ_fallthrough_nottaken(i64 %in) nounwind { +; CHECK: test_CBNZ_fallthrough_nottaken: + %tst = icmp ne i64 %in, 0 + br i1 %tst, label %true, label %false, !prof !1 + +; CHECK: cbnz {{x[0-9]+}}, [[TRUE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_false + +; CHECK: [[TRUE]]: +; CHECK: bl test_true + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_TBZ_fallthrough_taken(i32 %in) nounwind { +; CHECK: test_TBZ_fallthrough_taken: + %bit = and i32 %in, 32768 + %tst = icmp eq i32 %bit, 0 + br i1 %tst, label %true, label %false, !prof !0 + +; CHECK: tbnz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_true + +; CHECK: [[FALSE]]: +; CHECK: bl test_false + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_TBZ_fallthrough_nottaken(i64 %in) nounwind { +; CHECK: test_TBZ_fallthrough_nottaken: + %bit = and i64 %in, 32768 + %tst = icmp eq i64 %bit, 0 + br i1 %tst, label %true, label %false, !prof !1 + +; CHECK: tbz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_false + +; CHECK: [[TRUE]]: +; CHECK: bl test_true + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + + +define void @test_TBNZ_fallthrough_taken(i32 %in) nounwind { +; CHECK: test_TBNZ_fallthrough_taken: + %bit = and i32 %in, 32768 + %tst = icmp ne i32 %bit, 0 + br i1 %tst, label %true, label %false, !prof !0 + +; CHECK: tbz {{w[0-9]+}}, #15, [[FALSE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_true + +; CHECK: [[FALSE]]: +; CHECK: bl test_false + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + +define void @test_TBNZ_fallthrough_nottaken(i64 %in) nounwind { +; CHECK: test_TBNZ_fallthrough_nottaken: + %bit = and i64 %in, 32768 + %tst = icmp ne i64 %bit, 0 + br i1 %tst, label %true, label %false, !prof !1 + +; CHECK: tbnz {{x[0-9]+}}, #15, [[TRUE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: // BB# +; CHECK-NEXT: bl test_false + +; CHECK: [[TRUE]]: +; CHECK: bl test_true + +true: + call void @test_true() + ret void + +false: + call void @test_false() + ret void +} + diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll new file mode 100644 index 000000000000..3c03e47147b0 --- /dev/null +++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +define i32 @foo(i32* %var, i1 %cond) { +; CHECK: foo: + br i1 %cond, label %atomic_ver, label %simple_ver +simple_ver: + %oldval = load i32* %var + %newval = add nsw i32 %oldval, -1 + store i32 %newval, i32* %var + br label %somewhere +atomic_ver: + %val = atomicrmw add i32* %var, i32 -1 seq_cst + br label %somewhere +; CHECK: dmb +; CHECK: ldxr +; CHECK: dmb + ; The key point here is that the second dmb isn't immediately followed by the + ; simple_ver basic block, which LLVM attempted to do when DMB had been marked + ; with isBarrier. For now, look for something that looks like "somewhere". +; CHECK-NEXT: mov +somewhere: + %combined = phi i32 [ %val, %atomic_ver ], [ %newval, %simple_ver] + ret i32 %combined +} diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll new file mode 100644 index 000000000000..f3c16171cc83 --- /dev/null +++ b/test/CodeGen/AArch64/atomic-ops.ll @@ -0,0 +1,1055 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +@var8 = global i8 0 +@var16 = global i16 0 +@var32 = global i32 0 +@var64 = global i64 0 + +define i8 @test_atomic_load_add_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_add_i8: + %old = atomicrmw add i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_add_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_add_i16: + %old = atomicrmw add i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_add_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_add_i32: + %old = atomicrmw add i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_add_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_add_i64: + %old = atomicrmw add i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_sub_i8: + %old = atomicrmw sub i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_sub_i16: + %old = atomicrmw sub i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_sub_i32: + %old = atomicrmw sub i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_sub_i64: + %old = atomicrmw sub i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_and_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_and_i8: + %old = atomicrmw and i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_and_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_and_i16: + %old = atomicrmw and i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_and_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_and_i32: + %old = atomicrmw and i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_and_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_and_i64: + %old = atomicrmw and i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_or_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_or_i8: + %old = atomicrmw or i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_or_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_or_i16: + %old = atomicrmw or i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_or_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_or_i32: + %old = atomicrmw or i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_or_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_or_i64: + %old = atomicrmw or i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_xor_i8: + %old = atomicrmw xor i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_xor_i16: + %old = atomicrmw xor i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_xor_i32: + %old = atomicrmw xor i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_xor_i64: + %old = atomicrmw xor i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0 +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_xchg_i8: + %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_xchg_i16: + %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_xchg_i32: + %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_xchg_i64: + %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + + +define i8 @test_atomic_load_min_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_min_i8: + %old = atomicrmw min i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], sxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_min_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_min_i16: + %old = atomicrmw min i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], sxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_min_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_min_i32: + %old = atomicrmw min i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]] +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_min_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_min_i64: + %old = atomicrmw min i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp x0, x[[OLD]] +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_max_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_max_i8: + %old = atomicrmw max i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], sxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_max_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_max_i16: + %old = atomicrmw max i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], sxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_max_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_max_i32: + %old = atomicrmw max i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]] +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_max_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_max_i64: + %old = atomicrmw max i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp x0, x[[OLD]] +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_umin_i8: + %old = atomicrmw umin i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], uxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_umin_i16: + %old = atomicrmw umin i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], uxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_umin_i32: + %old = atomicrmw umin i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]] +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_umin_i64: + %old = atomicrmw umin i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp x0, x[[OLD]] +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { +; CHECK: test_atomic_load_umax_i8: + %old = atomicrmw umax i8* @var8, i8 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], uxtb +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { +; CHECK: test_atomic_load_umax_i16: + %old = atomicrmw umax i16* @var16, i16 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]], uxth +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { +; CHECK: test_atomic_load_umax_i32: + %old = atomicrmw umax i32* @var32, i32 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w0, w[[OLD]] +; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { +; CHECK: test_atomic_load_umax_i64: + %old = atomicrmw umax i64* @var64, i64 %offset seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; x0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp x0, x[[OLD]] +; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo +; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1 +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { +; CHECK: test_atomic_cmpxchg_i8: + %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] + ; As above, w1 is a reasonable guess. +; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i8 %old +} + +define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { +; CHECK: test_atomic_cmpxchg_i16: + %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16 + +; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] + ; As above, w1 is a reasonable guess. +; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i16 %old +} + +define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { +; CHECK: test_atomic_cmpxchg_i32: + %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32 + +; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp w[[OLD]], w0 +; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] + ; As above, w1 is a reasonable guess. +; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i32 %old +} + +define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { +; CHECK: test_atomic_cmpxchg_i64: + %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst +; CHECK: dmb ish +; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64 + +; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]: +; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]] + ; w0 below is a reasonable guess but could change: it certainly comes into the + ; function there. +; CHECK-NEXT: cmp x[[OLD]], x0 +; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]] + ; As above, w1 is a reasonable guess. +; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]] +; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]] +; CHECK: dmb ish + +; CHECK: mov x0, x[[OLD]] + ret i64 %old +} + +define i8 @test_atomic_load_monotonic_i8() nounwind { +; CHECK: test_atomic_load_monotonic_i8: + %val = load atomic i8* @var8 monotonic, align 1 +; CHECK-NOT: dmb +; CHECK: adrp x[[HIADDR:[0-9]+]], var8 +; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8] +; CHECK-NOT: dmb + + ret i8 %val +} + +define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind { +; CHECK: test_atomic_load_monotonic_regoff_i8: + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i8* + + %val = load atomic i8* %addr monotonic, align 1 +; CHECK-NOT: dmb +; CHECK: ldrb w0, [x0, x1] +; CHECK-NOT: dmb + + ret i8 %val +} + +define i8 @test_atomic_load_acquire_i8() nounwind { +; CHECK: test_atomic_load_acquire_i8: + %val = load atomic i8* @var8 acquire, align 1 +; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8 + +; CHECK: ldarb w0, [x[[ADDR]]] + ret i8 %val +} + +define i8 @test_atomic_load_seq_cst_i8() nounwind { +; CHECK: test_atomic_load_seq_cst_i8: + %val = load atomic i8* @var8 seq_cst, align 1 +; CHECK: adrp x[[HIADDR:[0-9]+]], var8 +; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8] +; CHECK: dmb ish + ret i8 %val +} + +define i16 @test_atomic_load_monotonic_i16() nounwind { +; CHECK: test_atomic_load_monotonic_i16: + %val = load atomic i16* @var16 monotonic, align 2 +; CHECK-NOT: dmb +; CHECK: adrp x[[HIADDR:[0-9]+]], var16 +; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16] +; CHECK-NOT: dmb + + ret i16 %val +} + +define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind { +; CHECK: test_atomic_load_monotonic_regoff_i32: + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i32* + + %val = load atomic i32* %addr monotonic, align 4 +; CHECK-NOT: dmb +; CHECK: ldr w0, [x0, x1] +; CHECK-NOT: dmb + + ret i32 %val +} + +define i64 @test_atomic_load_seq_cst_i64() nounwind { +; CHECK: test_atomic_load_seq_cst_i64: + %val = load atomic i64* @var64 seq_cst, align 8 +; CHECK: adrp x[[HIADDR:[0-9]+]], var64 +; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64] +; CHECK: dmb ish + ret i64 %val +} + +define void @test_atomic_store_monotonic_i8(i8 %val) nounwind { +; CHECK: test_atomic_store_monotonic_i8: + store atomic i8 %val, i8* @var8 monotonic, align 1 +; CHECK: adrp x[[HIADDR:[0-9]+]], var8 +; CHECK: strb w0, [x[[HIADDR]], #:lo12:var8] + + ret void +} + +define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val) nounwind { +; CHECK: test_atomic_store_monotonic_regoff_i8: + + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i8* + + store atomic i8 %val, i8* %addr monotonic, align 1 +; CHECK: strb w2, [x0, x1] + + ret void +} +define void @test_atomic_store_release_i8(i8 %val) nounwind { +; CHECK: test_atomic_store_release_i8: + store atomic i8 %val, i8* @var8 release, align 1 +; CHECK: adrp [[HIADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8 +; CHECK: stlrb w0, [x[[ADDR]]] + + ret void +} + +define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind { +; CHECK: test_atomic_store_seq_cst_i8: + store atomic i8 %val, i8* @var8 seq_cst, align 1 +; CHECK: adrp [[HIADDR:x[0-9]+]], var8 +; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8 +; CHECK: stlrb w0, [x[[ADDR]]] +; CHECK: dmb ish + + ret void +} + +define void @test_atomic_store_monotonic_i16(i16 %val) nounwind { +; CHECK: test_atomic_store_monotonic_i16: + store atomic i16 %val, i16* @var16 monotonic, align 2 +; CHECK: adrp x[[HIADDR:[0-9]+]], var16 +; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16] + + ret void +} + +define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %val) nounwind { +; CHECK: test_atomic_store_monotonic_regoff_i32: + + %addr_int = add i64 %base, %off + %addr = inttoptr i64 %addr_int to i32* + + store atomic i32 %val, i32* %addr monotonic, align 4 +; CHECK: str w2, [x0, x1] + + ret void +} + +define void @test_atomic_store_release_i64(i64 %val) nounwind { +; CHECK: test_atomic_store_release_i64: + store atomic i64 %val, i64* @var64 release, align 8 +; CHECK: adrp [[HIADDR:x[0-9]+]], var64 +; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64 +; CHECK: stlr x0, [x[[ADDR]]] + + ret void +} diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll new file mode 100644 index 000000000000..da94041c95ff --- /dev/null +++ b/test/CodeGen/AArch64/basic-pic.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -relocation-model=pic -filetype=obj %s -o -| llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s + +@var = global i32 0 + +; CHECK-ELF: RELOCATION RECORDS FOR [.text] + +define i32 @get_globalvar() { +; CHECK: get_globalvar: + + %val = load i32* @var +; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var +; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], #:got_lo12:var] +; CHECK: ldr w0, [x[[GOTLOC]]] + +; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var +; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var + ret i32 %val +} + +define i32* @get_globalvaraddr() { +; CHECK: get_globalvaraddr: + + %val = load i32* @var +; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var +; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:var] + +; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var +; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var + ret i32* @var +} + +@hiddenvar = hidden global i32 0 + +define i32 @get_hiddenvar() { +; CHECK: get_hiddenvar: + + %val = load i32* @hiddenvar +; CHECK: adrp x[[HI:[0-9]+]], hiddenvar +; CHECK: ldr w0, [x[[HI]], #:lo12:hiddenvar] + +; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar +; CHECK-ELF: R_AARCH64_LDST32_ABS_LO12_NC hiddenvar + ret i32 %val +} + +define i32* @get_hiddenvaraddr() { +; CHECK: get_hiddenvaraddr: + + %val = load i32* @hiddenvar +; CHECK: adrp [[HI:x[0-9]+]], hiddenvar +; CHECK: add x0, [[HI]], #:lo12:hiddenvar + +; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 hiddenvar +; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC hiddenvar + ret i32* @hiddenvar +} + +define void()* @get_func() { +; CHECK: get_func: + + ret void()* bitcast(void()*()* @get_func to void()*) +; CHECK: adrp x[[GOTHI:[0-9]+]], :got:get_func +; CHECK: ldr x0, [x[[GOTHI]], #:got_lo12:get_func] + + ; Particularly important that the ADRP gets a relocation, LLVM tends to think + ; it can relax it because it knows where get_func is. It can't! +; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE get_func +; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC get_func +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll new file mode 100644 index 000000000000..d1191f6aaa8a --- /dev/null +++ b/test/CodeGen/AArch64/bitfield-insert-0.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -disassemble - | FileCheck %s + +; The encoding of lsb -> immr in the CGed bitfield instructions was wrong at one +; point, in the edge case where lsb = 0. Just make sure. + +define void @test_bfi0(i32* %existing, i32* %new) { +; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18 + + %oldval = load volatile i32* %existing + %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000 + + %newval = load volatile i32* %new + %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff + + %combined = or i32 %newval_masked, %oldval_keep + store volatile i32 %combined, i32* %existing + + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll new file mode 100644 index 000000000000..3e871b9a6d27 --- /dev/null +++ b/test/CodeGen/AArch64/bitfield-insert.ll @@ -0,0 +1,193 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; First, a simple example from Clang. The registers could plausibly be +; different, but probably won't be. + +%struct.foo = type { i8, [2 x i8], i8 } + +define [1 x i64] @from_clang([1 x i64] %f.coerce, i32 %n) nounwind readnone { +; CHECK: from_clang: +; CHECK: bfi w0, w1, #3, #4 +; CHECK-NEXT: ret + +entry: + %f.coerce.fca.0.extract = extractvalue [1 x i64] %f.coerce, 0 + %tmp.sroa.0.0.extract.trunc = trunc i64 %f.coerce.fca.0.extract to i32 + %bf.value = shl i32 %n, 3 + %0 = and i32 %bf.value, 120 + %f.sroa.0.0.insert.ext.masked = and i32 %tmp.sroa.0.0.extract.trunc, 135 + %1 = or i32 %f.sroa.0.0.insert.ext.masked, %0 + %f.sroa.0.0.extract.trunc = zext i32 %1 to i64 + %tmp1.sroa.1.1.insert.insert = and i64 %f.coerce.fca.0.extract, 4294967040 + %tmp1.sroa.0.0.insert.insert = or i64 %f.sroa.0.0.extract.trunc, %tmp1.sroa.1.1.insert.insert + %.fca.0.insert = insertvalue [1 x i64] undef, i64 %tmp1.sroa.0.0.insert.insert, 0 + ret [1 x i64] %.fca.0.insert +} + +define void @test_whole32(i32* %existing, i32* %new) { +; CHECK: test_whole32: +; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5 + + %oldval = load volatile i32* %existing + %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff + + %newval = load volatile i32* %new + %newval_shifted = shl i32 %newval, 26 + %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing + + ret void +} + +define void @test_whole64(i64* %existing, i64* %new) { +; CHECK: test_whole64: +; CHECK: bfi {{x[0-9]+}}, {{x[0-9]+}}, #26, #14 +; CHECK-NOT: and +; CHECK: ret + + %oldval = load volatile i64* %existing + %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL + + %newval = load volatile i64* %new + %newval_shifted = shl i64 %newval, 26 + %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000 + + %combined = or i64 %oldval_keep, %newval_masked + store volatile i64 %combined, i64* %existing + + ret void +} + +define void @test_whole32_from64(i64* %existing, i64* %new) { +; CHECK: test_whole32_from64: +; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #{{0|16}}, #16 +; CHECK-NOT: and +; CHECK: ret + + %oldval = load volatile i64* %existing + %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000 + + %newval = load volatile i64* %new + %newval_masked = and i64 %newval, 65535 ; = 0xffff + + %combined = or i64 %oldval_keep, %newval_masked + store volatile i64 %combined, i64* %existing + + ret void +} + +define void @test_32bit_masked(i32 *%existing, i32 *%new) { +; CHECK: test_32bit_masked: +; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4 +; CHECK: and {{w[0-9]+}}, [[INSERT]], #0xff + + %oldval = load volatile i32* %existing + %oldval_keep = and i32 %oldval, 135 ; = 0x87 + + %newval = load volatile i32* %new + %newval_shifted = shl i32 %newval, 3 + %newval_masked = and i32 %newval_shifted, 120 ; = 0x78 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing + + ret void +} + +define void @test_64bit_masked(i64 *%existing, i64 *%new) { +; CHECK: test_64bit_masked: +; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8 +; CHECK: and {{x[0-9]+}}, [[INSERT]], #0xffff00000000 + + %oldval = load volatile i64* %existing + %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000 + + %newval = load volatile i64* %new + %newval_shifted = shl i64 %newval, 40 + %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000 + + %combined = or i64 %newval_masked, %oldval_keep + store volatile i64 %combined, i64* %existing + + ret void +} + +; Mask is too complicated for literal ANDwwi, make sure other avenues are tried. +define void @test_32bit_complexmask(i32 *%existing, i32 *%new) { +; CHECK: test_32bit_complexmask: +; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + + %oldval = load volatile i32* %existing + %oldval_keep = and i32 %oldval, 647 ; = 0x287 + + %newval = load volatile i32* %new + %newval_shifted = shl i32 %newval, 3 + %newval_masked = and i32 %newval_shifted, 120 ; = 0x278 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing + + ret void +} + +; Neither mask is is a contiguous set of 1s. BFI can't be used +define void @test_32bit_badmask(i32 *%existing, i32 *%new) { +; CHECK: test_32bit_badmask: +; CHECK-NOT: bfi +; CHECK: ret + + %oldval = load volatile i32* %existing + %oldval_keep = and i32 %oldval, 135 ; = 0x87 + + %newval = load volatile i32* %new + %newval_shifted = shl i32 %newval, 3 + %newval_masked = and i32 %newval_shifted, 632 ; = 0x278 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing + + ret void +} + +; Ditto +define void @test_64bit_badmask(i64 *%existing, i64 *%new) { +; CHECK: test_64bit_badmask: +; CHECK-NOT: bfi +; CHECK: ret + + %oldval = load volatile i64* %existing + %oldval_keep = and i64 %oldval, 135 ; = 0x87 + + %newval = load volatile i64* %new + %newval_shifted = shl i64 %newval, 3 + %newval_masked = and i64 %newval_shifted, 664 ; = 0x278 + + %combined = or i64 %oldval_keep, %newval_masked + store volatile i64 %combined, i64* %existing + + ret void +} + +; Bitfield insert where there's a left-over shr needed at the beginning +; (e.g. result of str.bf1 = str.bf2) +define void @test_32bit_with_shr(i32* %existing, i32* %new) { +; CHECK: test_32bit_with_shr: + + %oldval = load volatile i32* %existing + %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff + + %newval = load i32* %new + %newval_shifted = shl i32 %newval, 12 + %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000 + + %combined = or i32 %oldval_keep, %newval_masked + store volatile i32 %combined, i32* %existing +; CHECK: lsr [[BIT:w[0-9]+]], {{w[0-9]+}}, #14 +; CHECK: bfi {{w[0-9]}}, [[BIT]], #26, #5 + + ret void +} + diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll new file mode 100644 index 000000000000..36d337ef05ef --- /dev/null +++ b/test/CodeGen/AArch64/bitfield.ll @@ -0,0 +1,218 @@ + +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_extendb(i8 %var) { +; CHECK: test_extendb: + + %sxt32 = sext i8 %var to i32 + store volatile i32 %sxt32, i32* @var32 +; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}} + + %sxt64 = sext i8 %var to i64 + store volatile i64 %sxt64, i64* @var64 +; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}} + +; N.b. this doesn't actually produce a bitfield instruction at the +; moment, but it's still a good test to have and the semantics are +; correct. + %uxt32 = zext i8 %var to i32 + store volatile i32 %uxt32, i32* @var32 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff + + %uxt64 = zext i8 %var to i64 + store volatile i64 %uxt64, i64* @var64 +; CHECK: uxtb {{x[0-9]+}}, {{w[0-9]+}} + ret void +} + +define void @test_extendh(i16 %var) { +; CHECK: test_extendh: + + %sxt32 = sext i16 %var to i32 + store volatile i32 %sxt32, i32* @var32 +; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}} + + %sxt64 = sext i16 %var to i64 + store volatile i64 %sxt64, i64* @var64 +; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}} + +; N.b. this doesn't actually produce a bitfield instruction at the +; moment, but it's still a good test to have and the semantics are +; correct. + %uxt32 = zext i16 %var to i32 + store volatile i32 %uxt32, i32* @var32 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff + + %uxt64 = zext i16 %var to i64 + store volatile i64 %uxt64, i64* @var64 +; CHECK: uxth {{x[0-9]+}}, {{w[0-9]+}} + ret void +} + +define void @test_extendw(i32 %var) { +; CHECK: test_extendw: + + %sxt64 = sext i32 %var to i64 + store volatile i64 %sxt64, i64* @var64 +; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}} + + %uxt64 = zext i32 %var to i64 + store volatile i64 %uxt64, i64* @var64 +; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #0, #32 + ret void +} + +define void @test_shifts(i32 %val32, i64 %val64) { +; CHECK: test_shifts: + + %shift1 = ashr i32 %val32, 31 + store volatile i32 %shift1, i32* @var32 +; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, #31 + + %shift2 = lshr i32 %val32, 8 + store volatile i32 %shift2, i32* @var32 +; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #8 + + %shift3 = shl i32 %val32, 1 + store volatile i32 %shift3, i32* @var32 +; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #1 + + %shift4 = ashr i64 %val64, 31 + store volatile i64 %shift4, i64* @var64 +; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #31 + + %shift5 = lshr i64 %val64, 8 + store volatile i64 %shift5, i64* @var64 +; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #8 + + %shift6 = shl i64 %val64, 63 + store volatile i64 %shift6, i64* @var64 +; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, #63 + + %shift7 = ashr i64 %val64, 63 + store volatile i64 %shift7, i64* @var64 +; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, #63 + + %shift8 = lshr i64 %val64, 63 + store volatile i64 %shift8, i64* @var64 +; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, #63 + + %shift9 = lshr i32 %val32, 31 + store volatile i32 %shift9, i32* @var32 +; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, #31 + + %shift10 = shl i32 %val32, 31 + store volatile i32 %shift10, i32* @var32 +; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, #31 + + ret void +} + +; LLVM can produce in-register extensions taking place entirely with +; 64-bit registers too. +define void @test_sext_inreg_64(i64 %in) { +; CHECK: test_sext_inreg_64: + +; i1 doesn't have an official alias, but crops up and is handled by +; the bitfield ops. + %trunc_i1 = trunc i64 %in to i1 + %sext_i1 = sext i1 %trunc_i1 to i64 + store volatile i64 %sext_i1, i64* @var64 +; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1 + + %trunc_i8 = trunc i64 %in to i8 + %sext_i8 = sext i8 %trunc_i8 to i64 + store volatile i64 %sext_i8, i64* @var64 +; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}} + + %trunc_i16 = trunc i64 %in to i16 + %sext_i16 = sext i16 %trunc_i16 to i64 + store volatile i64 %sext_i16, i64* @var64 +; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}} + + %trunc_i32 = trunc i64 %in to i32 + %sext_i32 = sext i32 %trunc_i32 to i64 + store volatile i64 %sext_i32, i64* @var64 +; CHECK: sxtw {{x[0-9]+}}, {{w[0-9]+}} + ret void +} + +; These instructions don't actually select to official bitfield +; operations, but it's important that we select them somehow: +define void @test_zext_inreg_64(i64 %in) { +; CHECK: test_zext_inreg_64: + + %trunc_i8 = trunc i64 %in to i8 + %zext_i8 = zext i8 %trunc_i8 to i64 + store volatile i64 %zext_i8, i64* @var64 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff + + %trunc_i16 = trunc i64 %in to i16 + %zext_i16 = zext i16 %trunc_i16 to i64 + store volatile i64 %zext_i16, i64* @var64 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff + + %trunc_i32 = trunc i64 %in to i32 + %zext_i32 = zext i32 %trunc_i32 to i64 + store volatile i64 %zext_i32, i64* @var64 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffffffff + + ret void +} + +define i64 @test_sext_inreg_from_32(i32 %in) { +; CHECK: test_sext_inreg_from_32: + + %small = trunc i32 %in to i1 + %ext = sext i1 %small to i64 + + ; Different registers are of course, possible, though suboptimal. This is + ; making sure that a 64-bit "(sext_inreg (anyext GPR32), i1)" uses the 64-bit + ; sbfx rather than just 32-bits. +; CHECK: sbfx x0, x0, #0, #1 + ret i64 %ext +} + + +define i32 @test_ubfx32(i32* %addr) { +; CHECK: test_ubfx32: +; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3 + + %fields = load i32* %addr + %shifted = lshr i32 %fields, 23 + %masked = and i32 %shifted, 7 + ret i32 %masked +} + +define i64 @test_ubfx64(i64* %addr) { +; CHECK: test_ubfx64: +; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10 + + %fields = load i64* %addr + %shifted = lshr i64 %fields, 25 + %masked = and i64 %shifted, 1023 + ret i64 %masked +} + +define i32 @test_sbfx32(i32* %addr) { +; CHECK: test_sbfx32: +; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3 + + %fields = load i32* %addr + %shifted = shl i32 %fields, 23 + %extended = ashr i32 %shifted, 29 + ret i32 %extended +} + +define i64 @test_sbfx64(i64* %addr) { +; CHECK: test_sbfx64: +; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63 + + %fields = load i64* %addr + %shifted = shl i64 %fields, 1 + %extended = ashr i64 %shifted, 1 + ret i64 %extended +} diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll new file mode 100644 index 000000000000..3d0a5cf96bcd --- /dev/null +++ b/test/CodeGen/AArch64/blockaddress.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +@addr = global i8* null + +define void @test_blockaddress() { +; CHECK: test_blockaddress: + store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr + %val = load volatile i8** @addr + indirectbr i8* %val, [label %block] +; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]] +; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], #:lo12:[[DEST_LBL]] +; CHECK: str [[DEST]], +; CHECK: ldr [[NEWDEST:x[0-9]+]] +; CHECK: br [[NEWDEST]] + +block: + ret void +} diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll new file mode 100644 index 000000000000..5c7640bc4218 --- /dev/null +++ b/test/CodeGen/AArch64/bool-loads.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +@var = global i1 0 + +define i32 @test_sextloadi32() { +; CHECK: test_sextloadi32 + + %val = load i1* @var + %ret = sext i1 %val to i32 +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var] +; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1 + + ret i32 %ret +; CHECK: ret +} + +define i64 @test_sextloadi64() { +; CHECK: test_sextloadi64 + + %val = load i1* @var + %ret = sext i1 %val to i64 +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var] +; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #1 + + ret i64 %ret +; CHECK: ret +} + +define i32 @test_zextloadi32() { +; CHECK: test_zextloadi32 + +; It's not actually necessary that "ret" is next, but as far as LLVM +; is concerned only 0 or 1 should be loadable so no extension is +; necessary. + %val = load i1* @var + %ret = zext i1 %val to i32 +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var] + + ret i32 %ret +; CHECK-NEXT: ret +} + +define i64 @test_zextloadi64() { +; CHECK: test_zextloadi64 + +; It's not actually necessary that "ret" is next, but as far as LLVM +; is concerned only 0 or 1 should be loadable so no extension is +; necessary. + %val = load i1* @var + %ret = zext i1 %val to i64 +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var] + + ret i64 %ret +; CHECK-NEXT: ret +} diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll new file mode 100644 index 000000000000..38ed4734e1b4 --- /dev/null +++ b/test/CodeGen/AArch64/breg.ll @@ -0,0 +1,17 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@stored_label = global i8* null + +define void @foo() { +; CHECK: foo: + %lab = load i8** @stored_label + indirectbr i8* %lab, [label %otherlab, label %retlab] +; CHECK: adrp {{x[0-9]+}}, stored_label +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:stored_label] +; CHECK: br {{x[0-9]+}} + +otherlab: + ret void +retlab: + ret void +} diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll new file mode 100644 index 000000000000..c66aa5bfc510 --- /dev/null +++ b/test/CodeGen/AArch64/callee-save.ll @@ -0,0 +1,86 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var = global float 0.0 + +define void @foo() { +; CHECK: foo: + +; CHECK: stp d14, d15, [sp +; CHECK: stp d12, d13, [sp +; CHECK: stp d10, d11, [sp +; CHECK: stp d8, d9, [sp + + ; Create lots of live variables to exhaust the supply of + ; caller-saved registers + %val1 = load volatile float* @var + %val2 = load volatile float* @var + %val3 = load volatile float* @var + %val4 = load volatile float* @var + %val5 = load volatile float* @var + %val6 = load volatile float* @var + %val7 = load volatile float* @var + %val8 = load volatile float* @var + %val9 = load volatile float* @var + %val10 = load volatile float* @var + %val11 = load volatile float* @var + %val12 = load volatile float* @var + %val13 = load volatile float* @var + %val14 = load volatile float* @var + %val15 = load volatile float* @var + %val16 = load volatile float* @var + %val17 = load volatile float* @var + %val18 = load volatile float* @var + %val19 = load volatile float* @var + %val20 = load volatile float* @var + %val21 = load volatile float* @var + %val22 = load volatile float* @var + %val23 = load volatile float* @var + %val24 = load volatile float* @var + %val25 = load volatile float* @var + %val26 = load volatile float* @var + %val27 = load volatile float* @var + %val28 = load volatile float* @var + %val29 = load volatile float* @var + %val30 = load volatile float* @var + %val31 = load volatile float* @var + %val32 = load volatile float* @var + + store volatile float %val1, float* @var + store volatile float %val2, float* @var + store volatile float %val3, float* @var + store volatile float %val4, float* @var + store volatile float %val5, float* @var + store volatile float %val6, float* @var + store volatile float %val7, float* @var + store volatile float %val8, float* @var + store volatile float %val9, float* @var + store volatile float %val10, float* @var + store volatile float %val11, float* @var + store volatile float %val12, float* @var + store volatile float %val13, float* @var + store volatile float %val14, float* @var + store volatile float %val15, float* @var + store volatile float %val16, float* @var + store volatile float %val17, float* @var + store volatile float %val18, float* @var + store volatile float %val19, float* @var + store volatile float %val20, float* @var + store volatile float %val21, float* @var + store volatile float %val22, float* @var + store volatile float %val23, float* @var + store volatile float %val24, float* @var + store volatile float %val25, float* @var + store volatile float %val26, float* @var + store volatile float %val27, float* @var + store volatile float %val28, float* @var + store volatile float %val29, float* @var + store volatile float %val30, float* @var + store volatile float %val31, float* @var + store volatile float %val32, float* @var + +; CHECK: ldp d8, d9, [sp +; CHECK: ldp d10, d11, [sp +; CHECK: ldp d12, d13, [sp +; CHECK: ldp d14, d15, [sp + ret void +} diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll new file mode 100644 index 000000000000..4213110497d3 --- /dev/null +++ b/test/CodeGen/AArch64/compare-branch.ll @@ -0,0 +1,38 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @foo() { +; CHECK: foo: + + %val1 = load volatile i32* @var32 + %tst1 = icmp eq i32 %val1, 0 + br i1 %tst1, label %end, label %test2 +; CHECK: cbz {{w[0-9]+}}, .LBB + +test2: + %val2 = load volatile i32* @var32 + %tst2 = icmp ne i32 %val2, 0 + br i1 %tst2, label %end, label %test3 +; CHECK: cbnz {{w[0-9]+}}, .LBB + +test3: + %val3 = load volatile i64* @var64 + %tst3 = icmp eq i64 %val3, 0 + br i1 %tst3, label %end, label %test4 +; CHECK: cbz {{x[0-9]+}}, .LBB + +test4: + %val4 = load volatile i64* @var64 + %tst4 = icmp ne i64 %val4, 0 + br i1 %tst4, label %end, label %test5 +; CHECK: cbnz {{x[0-9]+}}, .LBB + +test5: + store volatile i64 %val4, i64* @var64 + ret void + +end: + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll new file mode 100644 index 000000000000..3051cf53fdf8 --- /dev/null +++ b/test/CodeGen/AArch64/cond-sel.ll @@ -0,0 +1,213 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { +; CHECK: test_csel: + + %tst1 = icmp ugt i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, i32 42, i32 52 + store i32 %val1, i32* @var32 +; CHECK: movz [[W52:w[0-9]+]], #52 +; CHECK: movz [[W42:w[0-9]+]], #42 +; CHECK: csel {{w[0-9]+}}, [[W42]], [[W52]], hi + + %rhs64 = sext i32 %rhs32 to i64 + %tst2 = icmp sle i64 %lhs64, %rhs64 + %val2 = select i1 %tst2, i64 %lhs64, i64 %rhs64 + store i64 %val2, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], [[RHS:w[0-9]+]], sxtw +; CHECK: sxtw [[EXT_RHS:x[0-9]+]], [[RHS]] +; CHECK: csel {{x[0-9]+}}, [[LHS]], [[EXT_RHS]], le + + ret void +; CHECK: ret +} + +define void @test_floatcsel(float %lhs32, float %rhs32, double %lhs64, double %rhs64) { +; CHECK: test_floatcsel: + + %tst1 = fcmp one float %lhs32, %rhs32 +; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}} + %val1 = select i1 %tst1, i32 42, i32 52 + store i32 %val1, i32* @var32 +; CHECK: movz [[W52:w[0-9]+]], #52 +; CHECK: movz [[W42:w[0-9]+]], #42 +; CHECK: csel [[MAYBETRUE:w[0-9]+]], [[W42]], [[W52]], mi +; CHECK: csel {{w[0-9]+}}, [[W42]], [[MAYBETRUE]], gt + + + %tst2 = fcmp ueq double %lhs64, %rhs64 +; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}} + %val2 = select i1 %tst2, i64 9, i64 15 + store i64 %val2, i64* @var64 +; CHECK: movz [[CONST15:x[0-9]+]], #15 +; CHECK: movz [[CONST9:x[0-9]+]], #9 +; CHECK: csel [[MAYBETRUE:x[0-9]+]], [[CONST9]], [[CONST15]], eq +; CHECK: csel {{x[0-9]+}}, [[CONST9]], [[MAYBETRUE]], vs + + ret void +; CHECK: ret +} + + +define void @test_csinc(i32 %lhs32, i32 %rhs32, i64 %lhs64) { +; CHECK: test_csinc: + +; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls). + %tst1 = icmp ugt i32 %lhs32, %rhs32 + %inc1 = add i32 %rhs32, 1 + %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32 + store volatile i32 %val1, i32* @var32 +; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]] +; CHECK: csinc {{w[0-9]+}}, [[LHS]], [[RHS]], ls + + %rhs2 = add i32 %rhs32, 42 + %tst2 = icmp sle i32 %lhs32, %rhs2 + %inc2 = add i32 %rhs32, 1 + %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2 + store volatile i32 %val2, i32* @var32 +; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}} +; CHECK: csinc {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le + +; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls). + %rhs3 = sext i32 %rhs32 to i64 + %tst3 = icmp ugt i64 %lhs64, %rhs3 + %inc3 = add i64 %rhs3, 1 + %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64 + store volatile i64 %val3, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}} +; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls + + %rhs4 = zext i32 %rhs32 to i64 + %tst4 = icmp sle i64 %lhs64, %rhs4 + %inc4 = add i64 %rhs4, 1 + %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4 + store volatile i64 %val4, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}} +; CHECK: csinc {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le + + ret void +; CHECK: ret +} + +define void @test_csinv(i32 %lhs32, i32 %rhs32, i64 %lhs64) { +; CHECK: test_csinv: + +; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls). + %tst1 = icmp ugt i32 %lhs32, %rhs32 + %inc1 = xor i32 -1, %rhs32 + %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32 + store volatile i32 %val1, i32* @var32 +; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]] +; CHECK: csinv {{w[0-9]+}}, [[LHS]], [[RHS]], ls + + %rhs2 = add i32 %rhs32, 42 + %tst2 = icmp sle i32 %lhs32, %rhs2 + %inc2 = xor i32 -1, %rhs32 + %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2 + store volatile i32 %val2, i32* @var32 +; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}} +; CHECK: csinv {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le + +; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls). + %rhs3 = sext i32 %rhs32 to i64 + %tst3 = icmp ugt i64 %lhs64, %rhs3 + %inc3 = xor i64 -1, %rhs3 + %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64 + store volatile i64 %val3, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}} +; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls + + %rhs4 = zext i32 %rhs32 to i64 + %tst4 = icmp sle i64 %lhs64, %rhs4 + %inc4 = xor i64 -1, %rhs4 + %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4 + store volatile i64 %val4, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}} +; CHECK: csinv {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le + + ret void +; CHECK: ret +} + +define void @test_csneg(i32 %lhs32, i32 %rhs32, i64 %lhs64) { +; CHECK: test_csneg: + +; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls). + %tst1 = icmp ugt i32 %lhs32, %rhs32 + %inc1 = sub i32 0, %rhs32 + %val1 = select i1 %tst1, i32 %inc1, i32 %lhs32 + store volatile i32 %val1, i32* @var32 +; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]] +; CHECK: csneg {{w[0-9]+}}, [[LHS]], [[RHS]], ls + + %rhs2 = add i32 %rhs32, 42 + %tst2 = icmp sle i32 %lhs32, %rhs2 + %inc2 = sub i32 0, %rhs32 + %val2 = select i1 %tst2, i32 %lhs32, i32 %inc2 + store volatile i32 %val2, i32* @var32 +; CHECK: cmp [[LHS:w[0-9]+]], {{w[0-9]+}} +; CHECK: csneg {{w[0-9]+}}, [[LHS]], {{w[0-9]+}}, le + +; Note that commuting rhs and lhs in the select changes ugt to ule (i.e. hi to ls). + %rhs3 = sext i32 %rhs32 to i64 + %tst3 = icmp ugt i64 %lhs64, %rhs3 + %inc3 = sub i64 0, %rhs3 + %val3 = select i1 %tst3, i64 %inc3, i64 %lhs64 + store volatile i64 %val3, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}} +; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, ls + + %rhs4 = zext i32 %rhs32 to i64 + %tst4 = icmp sle i64 %lhs64, %rhs4 + %inc4 = sub i64 0, %rhs4 + %val4 = select i1 %tst4, i64 %lhs64, i64 %inc4 + store volatile i64 %val4, i64* @var64 +; CHECK: cmp [[LHS:x[0-9]+]], {{w[0-9]+}} +; CHECK: csneg {{x[0-9]+}}, [[LHS]], {{x[0-9]+}}, le + + ret void +; CHECK: ret +} + +define void @test_cset(i32 %lhs, i32 %rhs, i64 %lhs64) { +; CHECK: test_cset: + +; N.b. code is not optimal here (32-bit csinc would be better) but +; incoming DAG is too complex + %tst1 = icmp eq i32 %lhs, %rhs + %val1 = zext i1 %tst1 to i32 + store i32 %val1, i32* @var32 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, ne + + %rhs64 = sext i32 %rhs to i64 + %tst2 = icmp ule i64 %lhs64, %rhs64 + %val2 = zext i1 %tst2 to i64 + store i64 %val2, i64* @var64 +; CHECK: csinc {{w[0-9]+}}, wzr, wzr, hi + + ret void +; CHECK: ret +} + +define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) { +; CHECK: test_csetm: + + %tst1 = icmp eq i32 %lhs, %rhs + %val1 = sext i1 %tst1 to i32 + store i32 %val1, i32* @var32 +; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: csinv {{w[0-9]+}}, wzr, wzr, ne + + %rhs64 = sext i32 %rhs to i64 + %tst2 = icmp ule i64 %lhs64, %rhs64 + %val2 = sext i1 %tst2 to i64 + store i64 %val2, i64* @var64 +; CHECK: csinv {{x[0-9]+}}, xzr, xzr, hi + + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/directcond.ll b/test/CodeGen/AArch64/directcond.ll new file mode 100644 index 000000000000..f5d57593bfad --- /dev/null +++ b/test/CodeGen/AArch64/directcond.ll @@ -0,0 +1,84 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i32 @test_select_i32(i1 %bit, i32 %a, i32 %b) { +; CHECK: test_select_i32: + %val = select i1 %bit, i32 %a, i32 %b +; CHECK: movz [[ONE:w[0-9]+]], #1 +; CHECK: tst w0, [[ONE]] +; CHECK-NEXT: csel w0, w1, w2, ne + + ret i32 %val +} + +define i64 @test_select_i64(i1 %bit, i64 %a, i64 %b) { +; CHECK: test_select_i64: + %val = select i1 %bit, i64 %a, i64 %b +; CHECK: movz [[ONE:w[0-9]+]], #1 +; CHECK: tst w0, [[ONE]] +; CHECK-NEXT: csel x0, x1, x2, ne + + ret i64 %val +} + +define float @test_select_float(i1 %bit, float %a, float %b) { +; CHECK: test_select_float: + %val = select i1 %bit, float %a, float %b +; CHECK: movz [[ONE:w[0-9]+]], #1 +; CHECK: tst w0, [[ONE]] +; CHECK-NEXT: fcsel s0, s0, s1, ne + + ret float %val +} + +define double @test_select_double(i1 %bit, double %a, double %b) { +; CHECK: test_select_double: + %val = select i1 %bit, double %a, double %b +; CHECK: movz [[ONE:w[0-9]+]], #1 +; CHECK: tst w0, [[ONE]] +; CHECK-NEXT: fcsel d0, d0, d1, ne + + ret double %val +} + +define i32 @test_brcond(i1 %bit) { +; CHECK: test_brcond: + br i1 %bit, label %true, label %false +; CHECK: tbz {{w[0-9]+}}, #0, .LBB + +true: + ret i32 0 +false: + ret i32 42 +} + +define i1 @test_setcc_float(float %lhs, float %rhs) { +; CHECK: test_setcc_float + %val = fcmp oeq float %lhs, %rhs +; CHECK: fcmp s0, s1 +; CHECK: csinc w0, wzr, wzr, ne + ret i1 %val +} + +define i1 @test_setcc_double(double %lhs, double %rhs) { +; CHECK: test_setcc_double + %val = fcmp oeq double %lhs, %rhs +; CHECK: fcmp d0, d1 +; CHECK: csinc w0, wzr, wzr, ne + ret i1 %val +} + +define i1 @test_setcc_i32(i32 %lhs, i32 %rhs) { +; CHECK: test_setcc_i32 + %val = icmp ugt i32 %lhs, %rhs +; CHECK: cmp w0, w1 +; CHECK: csinc w0, wzr, wzr, ls + ret i1 %val +} + +define i1 @test_setcc_i64(i64 %lhs, i64 %rhs) { +; CHECK: test_setcc_i64 + %val = icmp ne i64 %lhs, %rhs +; CHECK: cmp x0, x1 +; CHECK: csinc w0, wzr, wzr, eq + ret i1 %val +} diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll new file mode 100644 index 000000000000..c40d3933b44b --- /dev/null +++ b/test/CodeGen/AArch64/dp-3source.ll @@ -0,0 +1,163 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i32 @test_madd32(i32 %val0, i32 %val1, i32 %val2) { +; CHECK: test_madd32: + %mid = mul i32 %val1, %val2 + %res = add i32 %val0, %mid +; CHECK: madd {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i32 %res +} + +define i64 @test_madd64(i64 %val0, i64 %val1, i64 %val2) { +; CHECK: test_madd64: + %mid = mul i64 %val1, %val2 + %res = add i64 %val0, %mid +; CHECK: madd {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i32 @test_msub32(i32 %val0, i32 %val1, i32 %val2) { +; CHECK: test_msub32: + %mid = mul i32 %val1, %val2 + %res = sub i32 %val0, %mid +; CHECK: msub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i32 %res +} + +define i64 @test_msub64(i64 %val0, i64 %val1, i64 %val2) { +; CHECK: test_msub64: + %mid = mul i64 %val1, %val2 + %res = sub i64 %val0, %mid +; CHECK: msub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i64 @test_smaddl(i64 %acc, i32 %val1, i32 %val2) { +; CHECK: test_smaddl: + %ext1 = sext i32 %val1 to i64 + %ext2 = sext i32 %val2 to i64 + %prod = mul i64 %ext1, %ext2 + %res = add i64 %acc, %prod +; CHECK: smaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i64 @test_smsubl(i64 %acc, i32 %val1, i32 %val2) { +; CHECK: test_smsubl: + %ext1 = sext i32 %val1 to i64 + %ext2 = sext i32 %val2 to i64 + %prod = mul i64 %ext1, %ext2 + %res = sub i64 %acc, %prod +; CHECK: smsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i64 @test_umaddl(i64 %acc, i32 %val1, i32 %val2) { +; CHECK: test_umaddl: + %ext1 = zext i32 %val1 to i64 + %ext2 = zext i32 %val2 to i64 + %prod = mul i64 %ext1, %ext2 + %res = add i64 %acc, %prod +; CHECK: umaddl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i64 @test_umsubl(i64 %acc, i32 %val1, i32 %val2) { +; CHECK: test_umsubl: + %ext1 = zext i32 %val1 to i64 + %ext2 = zext i32 %val2 to i64 + %prod = mul i64 %ext1, %ext2 + %res = sub i64 %acc, %prod +; CHECK: umsubl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i64 @test_smulh(i64 %lhs, i64 %rhs) { +; CHECK: test_smulh: + %ext1 = sext i64 %lhs to i128 + %ext2 = sext i64 %rhs to i128 + %res = mul i128 %ext1, %ext2 + %high = lshr i128 %res, 64 + %val = trunc i128 %high to i64 +; CHECK: smulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + ret i64 %val +} + +define i64 @test_umulh(i64 %lhs, i64 %rhs) { +; CHECK: test_umulh: + %ext1 = zext i64 %lhs to i128 + %ext2 = zext i64 %rhs to i128 + %res = mul i128 %ext1, %ext2 + %high = lshr i128 %res, 64 + %val = trunc i128 %high to i64 +; CHECK: umulh {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + ret i64 %val +} + +define i32 @test_mul32(i32 %lhs, i32 %rhs) { +; CHECK: test_mul32: + %res = mul i32 %lhs, %rhs +; CHECK: mul {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i32 %res +} + +define i64 @test_mul64(i64 %lhs, i64 %rhs) { +; CHECK: test_mul64: + %res = mul i64 %lhs, %rhs +; CHECK: mul {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i32 @test_mneg32(i32 %lhs, i32 %rhs) { +; CHECK: test_mneg32: + %prod = mul i32 %lhs, %rhs + %res = sub i32 0, %prod +; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i32 %res +} + +define i64 @test_mneg64(i64 %lhs, i64 %rhs) { +; CHECK: test_mneg64: + %prod = mul i64 %lhs, %rhs + %res = sub i64 0, %prod +; CHECK: mneg {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + ret i64 %res +} + +define i64 @test_smull(i32 %lhs, i32 %rhs) { +; CHECK: test_smull: + %ext1 = sext i32 %lhs to i64 + %ext2 = sext i32 %rhs to i64 + %res = mul i64 %ext1, %ext2 +; CHECK: smull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i64 %res +} + +define i64 @test_umull(i32 %lhs, i32 %rhs) { +; CHECK: test_umull: + %ext1 = zext i32 %lhs to i64 + %ext2 = zext i32 %rhs to i64 + %res = mul i64 %ext1, %ext2 +; CHECK: umull {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i64 %res +} + +define i64 @test_smnegl(i32 %lhs, i32 %rhs) { +; CHECK: test_smnegl: + %ext1 = sext i32 %lhs to i64 + %ext2 = sext i32 %rhs to i64 + %prod = mul i64 %ext1, %ext2 + %res = sub i64 0, %prod +; CHECK: smnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i64 %res +} + +define i64 @test_umnegl(i32 %lhs, i32 %rhs) { +; CHECK: test_umnegl: + %ext1 = zext i32 %lhs to i64 + %ext2 = zext i32 %rhs to i64 + %prod = mul i64 %ext1, %ext2 + %res = sub i64 0, %prod +; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + ret i64 %res +} diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll new file mode 100644 index 000000000000..83aa8b4f6631 --- /dev/null +++ b/test/CodeGen/AArch64/dp1.ll @@ -0,0 +1,152 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @rev_i32() { +; CHECK: rev_i32: + %val0_tmp = load i32* @var32 + %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp) +; CHECK: rev {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val1_tmp, i32* @var32 + ret void +} + +define void @rev_i64() { +; CHECK: rev_i64: + %val0_tmp = load i64* @var64 + %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp) +; CHECK: rev {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val1_tmp, i64* @var64 + ret void +} + +define void @rev32_i64() { +; CHECK: rev32_i64: + %val0_tmp = load i64* @var64 + %val1_tmp = shl i64 %val0_tmp, 32 + %val5_tmp = sub i64 64, 32 + %val2_tmp = lshr i64 %val0_tmp, %val5_tmp + %val3_tmp = or i64 %val1_tmp, %val2_tmp + %val4_tmp = call i64 @llvm.bswap.i64(i64 %val3_tmp) +; CHECK: rev32 {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64 + ret void +} + +define void @rev16_i32() { +; CHECK: rev16_i32: + %val0_tmp = load i32* @var32 + %val1_tmp = shl i32 %val0_tmp, 16 + %val2_tmp = lshr i32 %val0_tmp, 16 + %val3_tmp = or i32 %val1_tmp, %val2_tmp + %val4_tmp = call i32 @llvm.bswap.i32(i32 %val3_tmp) +; CHECK: rev16 {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32 + ret void +} + +define void @clz_zerodef_i32() { +; CHECK: clz_zerodef_i32: + %val0_tmp = load i32* @var32 + %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0) +; CHECK: clz {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32 + ret void +} + +define void @clz_zerodef_i64() { +; CHECK: clz_zerodef_i64: + %val0_tmp = load i64* @var64 + %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0) +; CHECK: clz {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64 + ret void +} + +define void @clz_zeroundef_i32() { +; CHECK: clz_zeroundef_i32: + %val0_tmp = load i32* @var32 + %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1) +; CHECK: clz {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32 + ret void +} + +define void @clz_zeroundef_i64() { +; CHECK: clz_zeroundef_i64: + %val0_tmp = load i64* @var64 + %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1) +; CHECK: clz {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64 + ret void +} + +define void @cttz_zerodef_i32() { +; CHECK: cttz_zerodef_i32: + %val0_tmp = load i32* @var32 + %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0) +; CHECK: rbit [[REVERSED:w[0-9]+]], {{w[0-9]+}} +; CHECK: clz {{w[0-9]+}}, [[REVERSED]] + store volatile i32 %val4_tmp, i32* @var32 + ret void +} + +define void @cttz_zerodef_i64() { +; CHECK: cttz_zerodef_i64: + %val0_tmp = load i64* @var64 + %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0) +; CHECK: rbit [[REVERSED:x[0-9]+]], {{x[0-9]+}} +; CHECK: clz {{x[0-9]+}}, [[REVERSED]] + store volatile i64 %val4_tmp, i64* @var64 + ret void +} + +define void @cttz_zeroundef_i32() { +; CHECK: cttz_zeroundef_i32: + %val0_tmp = load i32* @var32 + %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1) +; CHECK: rbit [[REVERSED:w[0-9]+]], {{w[0-9]+}} +; CHECK: clz {{w[0-9]+}}, [[REVERSED]] + store volatile i32 %val4_tmp, i32* @var32 + ret void +} + +define void @cttz_zeroundef_i64() { +; CHECK: cttz_zeroundef_i64: + %val0_tmp = load i64* @var64 + %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1) +; CHECK: rbit [[REVERSED:x[0-9]+]], {{x[0-9]+}} +; CHECK: clz {{x[0-9]+}}, [[REVERSED]] + store volatile i64 %val4_tmp, i64* @var64 + ret void +} + +; These two are just compilation tests really: the operation's set to Expand in +; ISelLowering. +define void @ctpop_i32() { +; CHECK: ctpop_i32: + %val0_tmp = load i32* @var32 + %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp) + store volatile i32 %val4_tmp, i32* @var32 + ret void +} + +define void @ctpop_i64() { +; CHECK: ctpop_i64: + %val0_tmp = load i64* @var64 + %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp) + store volatile i64 %val4_tmp, i64* @var64 + ret void +} + + +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64) +declare i32 @llvm.ctlz.i32 (i32, i1) +declare i64 @llvm.ctlz.i64 (i64, i1) +declare i32 @llvm.cttz.i32 (i32, i1) +declare i64 @llvm.cttz.i64 (i64, i1) +declare i32 @llvm.ctpop.i32 (i32) +declare i64 @llvm.ctpop.i64 (i64) + diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll new file mode 100644 index 000000000000..4c740f6b8623 --- /dev/null +++ b/test/CodeGen/AArch64/dp2.ll @@ -0,0 +1,169 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32_0 = global i32 0 +@var32_1 = global i32 0 +@var64_0 = global i64 0 +@var64_1 = global i64 0 + +define void @rorv_i64() { +; CHECK: rorv_i64: + %val0_tmp = load i64* @var64_0 + %val1_tmp = load i64* @var64_1 + %val2_tmp = sub i64 64, %val1_tmp + %val3_tmp = shl i64 %val0_tmp, %val2_tmp + %val4_tmp = lshr i64 %val0_tmp, %val1_tmp + %val5_tmp = or i64 %val3_tmp, %val4_tmp +; CHECK: ror {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val5_tmp, i64* @var64_0 + ret void +} + +define void @asrv_i64() { +; CHECK: asrv_i64: + %val0_tmp = load i64* @var64_0 + %val1_tmp = load i64* @var64_1 + %val4_tmp = ashr i64 %val0_tmp, %val1_tmp +; CHECK: asr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64_1 + ret void +} + +define void @lsrv_i64() { +; CHECK: lsrv_i64: + %val0_tmp = load i64* @var64_0 + %val1_tmp = load i64* @var64_1 + %val4_tmp = lshr i64 %val0_tmp, %val1_tmp +; CHECK: lsr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64_0 + ret void +} + +define void @lslv_i64() { +; CHECK: lslv_i64: + %val0_tmp = load i64* @var64_0 + %val1_tmp = load i64* @var64_1 + %val4_tmp = shl i64 %val0_tmp, %val1_tmp +; CHECK: lsl {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64_1 + ret void +} + +define void @udiv_i64() { +; CHECK: udiv_i64: + %val0_tmp = load i64* @var64_0 + %val1_tmp = load i64* @var64_1 + %val4_tmp = udiv i64 %val0_tmp, %val1_tmp +; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64_0 + ret void +} + +define void @sdiv_i64() { +; CHECK: sdiv_i64: + %val0_tmp = load i64* @var64_0 + %val1_tmp = load i64* @var64_1 + %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp +; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %val4_tmp, i64* @var64_1 + ret void +} + + +define void @lsrv_i32() { +; CHECK: lsrv_i32: + %val0_tmp = load i32* @var32_0 + %val1_tmp = load i32* @var32_1 + %val2_tmp = add i32 1, %val1_tmp + %val4_tmp = lshr i32 %val0_tmp, %val2_tmp +; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32_0 + ret void +} + +define void @lslv_i32() { +; CHECK: lslv_i32: + %val0_tmp = load i32* @var32_0 + %val1_tmp = load i32* @var32_1 + %val2_tmp = add i32 1, %val1_tmp + %val4_tmp = shl i32 %val0_tmp, %val2_tmp +; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32_1 + ret void +} + +define void @rorv_i32() { +; CHECK: rorv_i32: + %val0_tmp = load i32* @var32_0 + %val6_tmp = load i32* @var32_1 + %val1_tmp = add i32 1, %val6_tmp + %val2_tmp = sub i32 32, %val1_tmp + %val3_tmp = shl i32 %val0_tmp, %val2_tmp + %val4_tmp = lshr i32 %val0_tmp, %val1_tmp + %val5_tmp = or i32 %val3_tmp, %val4_tmp +; CHECK: ror {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val5_tmp, i32* @var32_0 + ret void +} + +define void @asrv_i32() { +; CHECK: asrv_i32: + %val0_tmp = load i32* @var32_0 + %val1_tmp = load i32* @var32_1 + %val2_tmp = add i32 1, %val1_tmp + %val4_tmp = ashr i32 %val0_tmp, %val2_tmp +; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32_1 + ret void +} + +define void @sdiv_i32() { +; CHECK: sdiv_i32: + %val0_tmp = load i32* @var32_0 + %val1_tmp = load i32* @var32_1 + %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp +; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32_1 + ret void +} + +define void @udiv_i32() { +; CHECK: udiv_i32: + %val0_tmp = load i32* @var32_0 + %val1_tmp = load i32* @var32_1 + %val4_tmp = udiv i32 %val0_tmp, %val1_tmp +; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %val4_tmp, i32* @var32_0 + ret void +} + +; The point of this test is that we may not actually see (shl GPR32:$Val, (zext GPR32:$Val2)) +; in the DAG (the RHS may be natively 64-bit), but we should still use the lsl instructions. +define i32 @test_lsl32() { +; CHECK: test_lsl32: + + %val = load i32* @var32_0 + %ret = shl i32 1, %val +; CHECK: lsl {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + + ret i32 %ret +} + +define i32 @test_lsr32() { +; CHECK: test_lsr32: + + %val = load i32* @var32_0 + %ret = lshr i32 1, %val +; CHECK: lsr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + + ret i32 %ret +} + +define i32 @test_asr32(i32 %in) { +; CHECK: test_asr32: + + %val = load i32* @var32_0 + %ret = ashr i32 %in, %val +; CHECK: asr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + + ret i32 %ret +} diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll new file mode 100644 index 000000000000..ee89d8d94ba4 --- /dev/null +++ b/test/CodeGen/AArch64/elf-extern.ll @@ -0,0 +1,21 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s + +; External symbols are a different concept to global variables but should still +; get relocations and so on when used. + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) + +define i32 @check_extern() { + call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) + ret i32 0 +} + +; CHECK: .rela.text +; CHECK: ('r_sym', 0x00000009) +; CHECK-NEXT: ('r_type', 0x0000011b) + +; CHECK: .symtab +; CHECK: Symbol 9 +; CHECK-NEXT: memcpy + + diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll new file mode 100644 index 000000000000..3d3d8676818a --- /dev/null +++ b/test/CodeGen/AArch64/extern-weak.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s + +declare extern_weak i32 @var() + +define i32()* @foo() { +; The usual ADRP/ADD pair can't be used for a weak reference because it must +; evaluate to 0 if the symbol is undefined. We use a litpool entry. + ret i32()* @var +; CHECK: .LCPI0_0: +; CHECK-NEXT: .xword var + +; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0] + +} + + +@arr_var = extern_weak global [10 x i32] + +define i32* @bar() { + %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5 +; CHECK: .LCPI1_0: +; CHECK-NEXT: .xword arr_var + +; CHECK: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0] +; CHECK: add x0, [[BASE]], #20 + ret i32* %addr +} + +@defined_weak_var = internal unnamed_addr global i32 0 + +define i32* @wibble() { + ret i32* @defined_weak_var +; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var +; CHECK: add x0, [[BASE]], #:lo12:defined_weak_var +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/extract.ll b/test/CodeGen/AArch64/extract.ll new file mode 100644 index 000000000000..06267816a4e1 --- /dev/null +++ b/test/CodeGen/AArch64/extract.ll @@ -0,0 +1,57 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i64 @ror_i64(i64 %in) { +; CHECK: ror_i64: + %left = shl i64 %in, 19 + %right = lshr i64 %in, 45 + %val5 = or i64 %left, %right +; CHECK: extr {{x[0-9]+}}, x0, x0, #45 + ret i64 %val5 +} + +define i32 @ror_i32(i32 %in) { +; CHECK: ror_i32: + %left = shl i32 %in, 9 + %right = lshr i32 %in, 23 + %val5 = or i32 %left, %right +; CHECK: extr {{w[0-9]+}}, w0, w0, #23 + ret i32 %val5 +} + +define i32 @extr_i32(i32 %lhs, i32 %rhs) { +; CHECK: extr_i32: + %left = shl i32 %lhs, 6 + %right = lshr i32 %rhs, 26 + %val = or i32 %left, %right + ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use + ; something other than w0 and w1. +; CHECK: extr {{w[0-9]+}}, w0, w1, #26 + + ret i32 %val +} + +define i64 @extr_i64(i64 %lhs, i64 %rhs) { +; CHECK: extr_i64: + %right = lshr i64 %rhs, 40 + %left = shl i64 %lhs, 24 + %val = or i64 %right, %left + ; Order of lhs and rhs matters here. Regalloc would have to be very odd to use + ; something other than w0 and w1. +; CHECK: extr {{x[0-9]+}}, x0, x1, #40 + + ret i64 %val +} + +; Regression test: a bad experimental pattern crept into git which optimised +; this pattern to a single EXTR. +define i32 @extr_regress(i32 %a, i32 %b) { +; CHECK: extr_regress: + + %sh1 = shl i32 %a, 14 + %sh2 = lshr i32 %b, 14 + %val = or i32 %sh2, %sh1 +; CHECK-NOT: extr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, #{{[0-9]+}} + + ret i32 %val +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll new file mode 100644 index 000000000000..e40aa3033bde --- /dev/null +++ b/test/CodeGen/AArch64/fastcc-reserved.ll @@ -0,0 +1,58 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s + +; This test is designed to be run in the situation where the +; call-frame is not reserved (hence disable-fp-elim), but where +; callee-pop can occur (hence tailcallopt). + +declare fastcc void @will_pop([8 x i32], i32 %val) + +define fastcc void @foo(i32 %in) { +; CHECK: foo: + + %addr = alloca i8, i32 %in + +; Normal frame setup stuff: +; CHECK: sub sp, sp, +; CHECK: stp x29, x30 + +; Reserve space for call-frame: +; CHECK: sub sp, sp, #16 + + call fastcc void @will_pop([8 x i32] undef, i32 42) +; CHECK: bl will_pop + +; Since @will_pop is fastcc with tailcallopt, it will put the stack +; back where it needs to be, we shouldn't duplicate that +; CHECK-NOT: sub sp, sp, #16 +; CHECK-NOT: add sp, sp, + +; CHECK: ldp x29, x30 +; CHECK: add sp, sp, + ret void +} + +declare void @wont_pop([8 x i32], i32 %val) + +define void @foo1(i32 %in) { +; CHECK: foo1: + + %addr = alloca i8, i32 %in +; Normal frame setup again +; CHECK: sub sp, sp, +; CHECK: stp x29, x30 + +; Reserve space for call-frame +; CHECK: sub sp, sp, #16 + + call void @wont_pop([8 x i32] undef, i32 42) +; CHECK: bl wont_pop + +; This time we *do* need to unreserve the call-frame +; CHECK: add sp, sp, #16 + +; Check for epilogue (primarily to make sure sp spotted above wasn't +; part of it). +; CHECK: ldp x29, x30 +; CHECK: add sp, sp, + ret void +} diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll new file mode 100644 index 000000000000..41cde94edc1c --- /dev/null +++ b/test/CodeGen/AArch64/fastcc.ll @@ -0,0 +1,123 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; Without tailcallopt fastcc still means the caller cleans up the +; stack, so try to make sure this is respected. + +define fastcc void @func_stack0() { +; CHECK: func_stack0: +; CHECK: sub sp, sp, #48 + +; CHECK-TAIL: func_stack0: +; CHECK-TAIL: sub sp, sp, #48 + + + call fastcc void @func_stack8([8 x i32] undef, i32 42) +; CHECK: bl func_stack8 +; CHECK-NOT: sub sp, sp, + +; CHECK-TAIL: bl func_stack8 +; CHECK-TAIL: sub sp, sp, #16 + + + call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) +; CHECK: bl func_stack32 +; CHECK-NOT: sub sp, sp, + +; CHECK-TAIL: bl func_stack32 +; CHECK-TAIL: sub sp, sp, #32 + + + call fastcc void @func_stack0() +; CHECK: bl func_stack0 +; CHECK-NOT: sub sp, sp + +; CHECK-TAIL: bl func_stack0 +; CHECK-TAIL-NOT: sub sp, sp + + ret void +; CHECK: add sp, sp, #48 +; CHECK-NEXT: ret + +; CHECK-TAIL: add sp, sp, #48 +; CHECK-TAIL-NEXT: ret + +} + +define fastcc void @func_stack8([8 x i32], i32 %stacked) { +; CHECK: func_stack8: +; CHECK: sub sp, sp, #48 + +; CHECK-TAIL: func_stack8: +; CHECK-TAIL: sub sp, sp, #48 + + + call fastcc void @func_stack8([8 x i32] undef, i32 42) +; CHECK: bl func_stack8 +; CHECK-NOT: sub sp, sp, + +; CHECK-TAIL: bl func_stack8 +; CHECK-TAIL: sub sp, sp, #16 + + + call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) +; CHECK: bl func_stack32 +; CHECK-NOT: sub sp, sp, + +; CHECK-TAIL: bl func_stack32 +; CHECK-TAIL: sub sp, sp, #32 + + + call fastcc void @func_stack0() +; CHECK: bl func_stack0 +; CHECK-NOT: sub sp, sp + +; CHECK-TAIL: bl func_stack0 +; CHECK-TAIL-NOT: sub sp, sp + + ret void +; CHECK: add sp, sp, #48 +; CHECK-NEXT: ret + +; CHECK-TAIL: add sp, sp, #64 +; CHECK-TAIL-NEXT: ret +} + +define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) { +; CHECK: func_stack32: +; CHECK: sub sp, sp, #48 + +; CHECK-TAIL: func_stack32: +; CHECK-TAIL: sub sp, sp, #48 + + + call fastcc void @func_stack8([8 x i32] undef, i32 42) +; CHECK: bl func_stack8 +; CHECK-NOT: sub sp, sp, + +; CHECK-TAIL: bl func_stack8 +; CHECK-TAIL: sub sp, sp, #16 + + + call fastcc void @func_stack32([8 x i32] undef, i128 0, i128 9) +; CHECK: bl func_stack32 +; CHECK-NOT: sub sp, sp, + +; CHECK-TAIL: bl func_stack32 +; CHECK-TAIL: sub sp, sp, #32 + + + call fastcc void @func_stack0() +; CHECK: bl func_stack0 +; CHECK-NOT: sub sp, sp + +; CHECK-TAIL: bl func_stack0 +; CHECK-TAIL-NOT: sub sp, sp + + ret void +; CHECK: add sp, sp, #48 +; CHECK-NEXT: ret + +; CHECK-TAIL: add sp, sp, #80 +; CHECK-TAIL-NEXT: ret +} diff --git a/test/CodeGen/AArch64/fcmp.ll b/test/CodeGen/AArch64/fcmp.ll new file mode 100644 index 000000000000..ad4a903c9b25 --- /dev/null +++ b/test/CodeGen/AArch64/fcmp.ll @@ -0,0 +1,81 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +declare void @bar(i32) + +define void @test_float(float %a, float %b) { +; CHECK: test_float: + + %tst1 = fcmp oeq float %a, %b + br i1 %tst1, label %end, label %t2 +; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}} +; CHECK: b.eq .L + +t2: + %tst2 = fcmp une float %b, 0.0 + br i1 %tst2, label %t3, label %end +; CHECK: fcmp {{s[0-9]+}}, #0.0 +; CHECK: b.eq .L + + +t3: +; This test can't be implemented with just one A64 conditional +; branch. LLVM converts "ordered and not equal" to "unordered or +; equal" before instruction selection, which is what we currently +; test. Obviously, other sequences are valid. + %tst3 = fcmp one float %a, %b + br i1 %tst3, label %t4, label %end +; CHECK: fcmp {{s[0-9]+}}, {{s[0-9]+}} +; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b.vs .[[T4]] +t4: + %tst4 = fcmp uge float %a, -0.0 + br i1 %tst4, label %t5, label %end +; CHECK-NOT: fcmp {{s[0-9]+}}, #0.0 +; CHECK: b.mi .LBB + +t5: + call void @bar(i32 0) + ret void +end: + ret void + +} + +define void @test_double(double %a, double %b) { +; CHECK: test_double: + + %tst1 = fcmp oeq double %a, %b + br i1 %tst1, label %end, label %t2 +; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}} +; CHECK: b.eq .L + +t2: + %tst2 = fcmp une double %b, 0.0 + br i1 %tst2, label %t3, label %end +; CHECK: fcmp {{d[0-9]+}}, #0.0 +; CHECK: b.eq .L + + +t3: +; This test can't be implemented with just one A64 conditional +; branch. LLVM converts "ordered and not equal" to "unordered or +; equal" before instruction selection, which is what we currently +; test. Obviously, other sequences are valid. + %tst3 = fcmp one double %a, %b + br i1 %tst3, label %t4, label %end +; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-NEXT: b.eq .[[T4:LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: b.vs .[[T4]] +t4: + %tst4 = fcmp uge double %a, -0.0 + br i1 %tst4, label %t5, label %end +; CHECK-NOT: fcmp {{d[0-9]+}}, #0.0 +; CHECK: b.mi .LBB + +t5: + call void @bar(i32 0) + ret void +end: + ret void + +} diff --git a/test/CodeGen/AArch64/fcvt-fixed.ll b/test/CodeGen/AArch64/fcvt-fixed.ll new file mode 100644 index 000000000000..0f7b95b2a48f --- /dev/null +++ b/test/CodeGen/AArch64/fcvt-fixed.ll @@ -0,0 +1,191 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_fcvtzs(float %flt, double %dbl) { +; CHECK: test_fcvtzs: + + %fix1 = fmul float %flt, 128.0 + %cvt1 = fptosi float %fix1 to i32 +; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #7 + store volatile i32 %cvt1, i32* @var32 + + %fix2 = fmul float %flt, 4294967296.0 + %cvt2 = fptosi float %fix2 to i32 +; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #32 + store volatile i32 %cvt2, i32* @var32 + + %fix3 = fmul float %flt, 128.0 + %cvt3 = fptosi float %fix3 to i64 +; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #7 + store volatile i64 %cvt3, i64* @var64 + + %fix4 = fmul float %flt, 18446744073709551616.0 + %cvt4 = fptosi float %fix4 to i64 +; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #64 + store volatile i64 %cvt4, i64* @var64 + + %fix5 = fmul double %dbl, 128.0 + %cvt5 = fptosi double %fix5 to i32 +; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #7 + store volatile i32 %cvt5, i32* @var32 + + %fix6 = fmul double %dbl, 4294967296.0 + %cvt6 = fptosi double %fix6 to i32 +; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #32 + store volatile i32 %cvt6, i32* @var32 + + %fix7 = fmul double %dbl, 128.0 + %cvt7 = fptosi double %fix7 to i64 +; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #7 + store volatile i64 %cvt7, i64* @var64 + + %fix8 = fmul double %dbl, 18446744073709551616.0 + %cvt8 = fptosi double %fix8 to i64 +; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #64 + store volatile i64 %cvt8, i64* @var64 + + ret void +} + +define void @test_fcvtzu(float %flt, double %dbl) { +; CHECK: test_fcvtzu: + + %fix1 = fmul float %flt, 128.0 + %cvt1 = fptoui float %fix1 to i32 +; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #7 + store volatile i32 %cvt1, i32* @var32 + + %fix2 = fmul float %flt, 4294967296.0 + %cvt2 = fptoui float %fix2 to i32 +; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #32 + store volatile i32 %cvt2, i32* @var32 + + %fix3 = fmul float %flt, 128.0 + %cvt3 = fptoui float %fix3 to i64 +; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #7 + store volatile i64 %cvt3, i64* @var64 + + %fix4 = fmul float %flt, 18446744073709551616.0 + %cvt4 = fptoui float %fix4 to i64 +; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #64 + store volatile i64 %cvt4, i64* @var64 + + %fix5 = fmul double %dbl, 128.0 + %cvt5 = fptoui double %fix5 to i32 +; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #7 + store volatile i32 %cvt5, i32* @var32 + + %fix6 = fmul double %dbl, 4294967296.0 + %cvt6 = fptoui double %fix6 to i32 +; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #32 + store volatile i32 %cvt6, i32* @var32 + + %fix7 = fmul double %dbl, 128.0 + %cvt7 = fptoui double %fix7 to i64 +; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #7 + store volatile i64 %cvt7, i64* @var64 + + %fix8 = fmul double %dbl, 18446744073709551616.0 + %cvt8 = fptoui double %fix8 to i64 +; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #64 + store volatile i64 %cvt8, i64* @var64 + + ret void +} + +@varfloat = global float 0.0 +@vardouble = global double 0.0 + +define void @test_scvtf(i32 %int, i64 %long) { +; CHECK: test_scvtf: + + %cvt1 = sitofp i32 %int to float + %fix1 = fdiv float %cvt1, 128.0 +; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #7 + store volatile float %fix1, float* @varfloat + + %cvt2 = sitofp i32 %int to float + %fix2 = fdiv float %cvt2, 4294967296.0 +; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #32 + store volatile float %fix2, float* @varfloat + + %cvt3 = sitofp i64 %long to float + %fix3 = fdiv float %cvt3, 128.0 +; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #7 + store volatile float %fix3, float* @varfloat + + %cvt4 = sitofp i64 %long to float + %fix4 = fdiv float %cvt4, 18446744073709551616.0 +; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #64 + store volatile float %fix4, float* @varfloat + + %cvt5 = sitofp i32 %int to double + %fix5 = fdiv double %cvt5, 128.0 +; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #7 + store volatile double %fix5, double* @vardouble + + %cvt6 = sitofp i32 %int to double + %fix6 = fdiv double %cvt6, 4294967296.0 +; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #32 + store volatile double %fix6, double* @vardouble + + %cvt7 = sitofp i64 %long to double + %fix7 = fdiv double %cvt7, 128.0 +; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #7 + store volatile double %fix7, double* @vardouble + + %cvt8 = sitofp i64 %long to double + %fix8 = fdiv double %cvt8, 18446744073709551616.0 +; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #64 + store volatile double %fix8, double* @vardouble + + ret void +} + +define void @test_ucvtf(i32 %int, i64 %long) { +; CHECK: test_ucvtf: + + %cvt1 = uitofp i32 %int to float + %fix1 = fdiv float %cvt1, 128.0 +; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #7 + store volatile float %fix1, float* @varfloat + + %cvt2 = uitofp i32 %int to float + %fix2 = fdiv float %cvt2, 4294967296.0 +; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #32 + store volatile float %fix2, float* @varfloat + + %cvt3 = uitofp i64 %long to float + %fix3 = fdiv float %cvt3, 128.0 +; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #7 + store volatile float %fix3, float* @varfloat + + %cvt4 = uitofp i64 %long to float + %fix4 = fdiv float %cvt4, 18446744073709551616.0 +; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #64 + store volatile float %fix4, float* @varfloat + + %cvt5 = uitofp i32 %int to double + %fix5 = fdiv double %cvt5, 128.0 +; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #7 + store volatile double %fix5, double* @vardouble + + %cvt6 = uitofp i32 %int to double + %fix6 = fdiv double %cvt6, 4294967296.0 +; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #32 + store volatile double %fix6, double* @vardouble + + %cvt7 = uitofp i64 %long to double + %fix7 = fdiv double %cvt7, 128.0 +; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #7 + store volatile double %fix7, double* @vardouble + + %cvt8 = uitofp i64 %long to double + %fix8 = fdiv double %cvt8, 18446744073709551616.0 +; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #64 + store volatile double %fix8, double* @vardouble + + ret void +} diff --git a/test/CodeGen/AArch64/fcvt-int.ll b/test/CodeGen/AArch64/fcvt-int.ll new file mode 100644 index 000000000000..c771d683a99c --- /dev/null +++ b/test/CodeGen/AArch64/fcvt-int.ll @@ -0,0 +1,151 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i32 @test_floattoi32(float %in) { +; CHECK: test_floattoi32: + + %signed = fptosi float %in to i32 + %unsigned = fptoui float %in to i32 +; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{s[0-9]+}} +; CHECK: fcvtzs [[SIG:w[0-9]+]], {{s[0-9]+}} + + %res = sub i32 %signed, %unsigned +; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]] + + ret i32 %res +; CHECK: ret +} + +define i32 @test_doubletoi32(double %in) { +; CHECK: test_doubletoi32: + + %signed = fptosi double %in to i32 + %unsigned = fptoui double %in to i32 +; CHECK: fcvtzu [[UNSIG:w[0-9]+]], {{d[0-9]+}} +; CHECK: fcvtzs [[SIG:w[0-9]+]], {{d[0-9]+}} + + %res = sub i32 %signed, %unsigned +; CHECK: sub {{w[0-9]+}}, [[SIG]], [[UNSIG]] + + ret i32 %res +; CHECK: ret +} + +define i64 @test_floattoi64(float %in) { +; CHECK: test_floattoi64: + + %signed = fptosi float %in to i64 + %unsigned = fptoui float %in to i64 +; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{s[0-9]+}} +; CHECK: fcvtzs [[SIG:x[0-9]+]], {{s[0-9]+}} + + %res = sub i64 %signed, %unsigned +; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]] + + ret i64 %res +; CHECK: ret +} + +define i64 @test_doubletoi64(double %in) { +; CHECK: test_doubletoi64: + + %signed = fptosi double %in to i64 + %unsigned = fptoui double %in to i64 +; CHECK: fcvtzu [[UNSIG:x[0-9]+]], {{d[0-9]+}} +; CHECK: fcvtzs [[SIG:x[0-9]+]], {{d[0-9]+}} + + %res = sub i64 %signed, %unsigned +; CHECK: sub {{x[0-9]+}}, [[SIG]], [[UNSIG]] + + ret i64 %res +; CHECK: ret +} + +define float @test_i32tofloat(i32 %in) { +; CHECK: test_i32tofloat: + + %signed = sitofp i32 %in to float + %unsigned = uitofp i32 %in to float +; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{w[0-9]+}} +; CHECK: scvtf [[SIG:s[0-9]+]], {{w[0-9]+}} + + %res = fsub float %signed, %unsigned +; CHECL: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]] + ret float %res +; CHECK: ret +} + +define double @test_i32todouble(i32 %in) { +; CHECK: test_i32todouble: + + %signed = sitofp i32 %in to double + %unsigned = uitofp i32 %in to double +; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{w[0-9]+}} +; CHECK: scvtf [[SIG:d[0-9]+]], {{w[0-9]+}} + + %res = fsub double %signed, %unsigned +; CHECK: fsub {{d[0-9]+}}, [[SIG]], [[UNSIG]] + ret double %res +; CHECK: ret +} + +define float @test_i64tofloat(i64 %in) { +; CHECK: test_i64tofloat: + + %signed = sitofp i64 %in to float + %unsigned = uitofp i64 %in to float +; CHECK: ucvtf [[UNSIG:s[0-9]+]], {{x[0-9]+}} +; CHECK: scvtf [[SIG:s[0-9]+]], {{x[0-9]+}} + + %res = fsub float %signed, %unsigned +; CHECK: fsub {{s[0-9]+}}, [[SIG]], [[UNSIG]] + ret float %res +; CHECK: ret +} + +define double @test_i64todouble(i64 %in) { +; CHECK: test_i64todouble: + + %signed = sitofp i64 %in to double + %unsigned = uitofp i64 %in to double +; CHECK: ucvtf [[UNSIG:d[0-9]+]], {{x[0-9]+}} +; CHECK: scvtf [[SIG:d[0-9]+]], {{x[0-9]+}} + + %res = fsub double %signed, %unsigned +; CHECK: sub {{d[0-9]+}}, [[SIG]], [[UNSIG]] + ret double %res +; CHECK: ret +} + +define i32 @test_bitcastfloattoi32(float %in) { +; CHECK: test_bitcastfloattoi32: + + %res = bitcast float %in to i32 +; CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} + ret i32 %res +} + +define i64 @test_bitcastdoubletoi64(double %in) { +; CHECK: test_bitcastdoubletoi64: + + %res = bitcast double %in to i64 +; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} + ret i64 %res +} + +define float @test_bitcasti32tofloat(i32 %in) { +; CHECK: test_bitcasti32tofloat: + + %res = bitcast i32 %in to float +; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}} + ret float %res + +} + +define double @test_bitcasti64todouble(i64 %in) { +; CHECK: test_bitcasti64todouble: + + %res = bitcast i64 %in to double +; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} + ret double %res + +} diff --git a/test/CodeGen/AArch64/flags-multiuse.ll b/test/CodeGen/AArch64/flags-multiuse.ll new file mode 100644 index 000000000000..940c146f0a9f --- /dev/null +++ b/test/CodeGen/AArch64/flags-multiuse.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +; LLVM should be able to cope with multiple uses of the same flag-setting +; instruction at different points of a routine. Either by rematerializing the +; compare or by saving and restoring the flag register. + +declare void @bar() + +@var = global i32 0 + +define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) { +; CHECK: test_multiflag: + + %test = icmp ne i32 %n, %m +; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]] + + %val = zext i1 %test to i32 +; CHECK: csinc {{[xw][0-9]+}}, {{xzr|wzr}}, {{xzr|wzr}}, eq + + store i32 %val, i32* @var + + call void @bar() +; CHECK: bl bar + + ; Currently, the comparison is emitted again. An MSR/MRS pair would also be + ; acceptable, but assuming the call preserves NZCV is not. + br i1 %test, label %iftrue, label %iffalse +; CHECK: cmp [[LHS]], [[RHS]] +; CHECK: b.eq + +iftrue: + ret i32 42 +iffalse: + ret i32 0 +} diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll new file mode 100644 index 000000000000..c94ba9b57b5a --- /dev/null +++ b/test/CodeGen/AArch64/floatdp_1source.ll @@ -0,0 +1,138 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@varhalf = global half 0.0 +@varfloat = global float 0.0 +@vardouble = global double 0.0 + +declare float @fabsf(float) readonly +declare double @fabs(double) readonly + +declare float @llvm.sqrt.f32(float %Val) +declare double @llvm.sqrt.f64(double %Val) + +declare float @ceilf(float) readonly +declare double @ceil(double) readonly + +declare float @floorf(float) readonly +declare double @floor(double) readonly + +declare float @truncf(float) readonly +declare double @trunc(double) readonly + +declare float @rintf(float) readonly +declare double @rint(double) readonly + +declare float @nearbyintf(float) readonly +declare double @nearbyint(double) readonly + +define void @simple_float() { +; CHECK: simple_float: + %val1 = load volatile float* @varfloat + + %valabs = call float @fabsf(float %val1) + store volatile float %valabs, float* @varfloat +; CHECK: fabs {{s[0-9]+}}, {{s[0-9]+}} + + %valneg = fsub float -0.0, %val1 + store volatile float %valneg, float* @varfloat +; CHECK: fneg {{s[0-9]+}}, {{s[0-9]+}} + + %valsqrt = call float @llvm.sqrt.f32(float %val1) + store volatile float %valsqrt, float* @varfloat +; CHECK: fsqrt {{s[0-9]+}}, {{s[0-9]+}} + + %valceil = call float @ceilf(float %val1) + store volatile float %valceil, float* @varfloat +; CHECK: frintp {{s[0-9]+}}, {{s[0-9]+}} + + %valfloor = call float @floorf(float %val1) + store volatile float %valfloor, float* @varfloat +; CHECK: frintm {{s[0-9]+}}, {{s[0-9]+}} + + %valtrunc = call float @truncf(float %val1) + store volatile float %valtrunc, float* @varfloat +; CHECK: frintz {{s[0-9]+}}, {{s[0-9]+}} + + %valrint = call float @rintf(float %val1) + store volatile float %valrint, float* @varfloat +; CHECK: frintx {{s[0-9]+}}, {{s[0-9]+}} + + %valnearbyint = call float @nearbyintf(float %val1) + store volatile float %valnearbyint, float* @varfloat +; CHECK: frinti {{s[0-9]+}}, {{s[0-9]+}} + + ret void +} + +define void @simple_double() { +; CHECK: simple_double: + %val1 = load volatile double* @vardouble + + %valabs = call double @fabs(double %val1) + store volatile double %valabs, double* @vardouble +; CHECK: fabs {{d[0-9]+}}, {{d[0-9]+}} + + %valneg = fsub double -0.0, %val1 + store volatile double %valneg, double* @vardouble +; CHECK: fneg {{d[0-9]+}}, {{d[0-9]+}} + + %valsqrt = call double @llvm.sqrt.f64(double %val1) + store volatile double %valsqrt, double* @vardouble +; CHECK: fsqrt {{d[0-9]+}}, {{d[0-9]+}} + + %valceil = call double @ceil(double %val1) + store volatile double %valceil, double* @vardouble +; CHECK: frintp {{d[0-9]+}}, {{d[0-9]+}} + + %valfloor = call double @floor(double %val1) + store volatile double %valfloor, double* @vardouble +; CHECK: frintm {{d[0-9]+}}, {{d[0-9]+}} + + %valtrunc = call double @trunc(double %val1) + store volatile double %valtrunc, double* @vardouble +; CHECK: frintz {{d[0-9]+}}, {{d[0-9]+}} + + %valrint = call double @rint(double %val1) + store volatile double %valrint, double* @vardouble +; CHECK: frintx {{d[0-9]+}}, {{d[0-9]+}} + + %valnearbyint = call double @nearbyint(double %val1) + store volatile double %valnearbyint, double* @vardouble +; CHECK: frinti {{d[0-9]+}}, {{d[0-9]+}} + + ret void +} + +define void @converts() { +; CHECK: converts: + + %val16 = load volatile half* @varhalf + %val32 = load volatile float* @varfloat + %val64 = load volatile double* @vardouble + + %val16to32 = fpext half %val16 to float + store volatile float %val16to32, float* @varfloat +; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}} + + %val16to64 = fpext half %val16 to double + store volatile double %val16to64, double* @vardouble +; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}} + + %val32to16 = fptrunc float %val32 to half + store volatile half %val32to16, half* @varhalf +; CHECK: fcvt {{h[0-9]+}}, {{s[0-9]+}} + + %val32to64 = fpext float %val32 to double + store volatile double %val32to64, double* @vardouble +; CHECK: fcvt {{d[0-9]+}}, {{s[0-9]+}} + + %val64to16 = fptrunc double %val64 to half + store volatile half %val64to16, half* @varhalf +; CHECK: fcvt {{h[0-9]+}}, {{d[0-9]+}} + + %val64to32 = fptrunc double %val64 to float + store volatile float %val64to32, float* @varfloat +; CHECK: fcvt {{s[0-9]+}}, {{d[0-9]+}} + + ret void +} diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll new file mode 100644 index 000000000000..b2256b342acf --- /dev/null +++ b/test/CodeGen/AArch64/floatdp_2source.ll @@ -0,0 +1,60 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@varfloat = global float 0.0 +@vardouble = global double 0.0 + +define void @testfloat() { +; CHECK: testfloat: + %val1 = load float* @varfloat + + %val2 = fadd float %val1, %val1 +; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + + %val3 = fmul float %val2, %val1 +; CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + + %val4 = fdiv float %val3, %val1 +; CHECK: fdiv {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + + %val5 = fsub float %val4, %val2 +; CHECK: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + + store volatile float %val5, float* @varfloat + +; These will be enabled with the implementation of floating-point litpool entries. + %val6 = fmul float %val1, %val2 + %val7 = fsub float -0.0, %val6 +; CHECK: fnmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + + store volatile float %val7, float* @varfloat + + ret void +} + +define void @testdouble() { +; CHECK: testdouble: + %val1 = load double* @vardouble + + %val2 = fadd double %val1, %val1 +; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + + %val3 = fmul double %val2, %val1 +; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + + %val4 = fdiv double %val3, %val1 +; CHECK: fdiv {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + + %val5 = fsub double %val4, %val2 +; CHECK: fsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + + store volatile double %val5, double* @vardouble + +; These will be enabled with the implementation of doubleing-point litpool entries. + %val6 = fmul double %val1, %val2 + %val7 = fsub double -0.0, %val6 +; CHECK: fnmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + + store volatile double %val7, double* @vardouble + + ret void +} diff --git a/test/CodeGen/AArch64/fp-cond-sel.ll b/test/CodeGen/AArch64/fp-cond-sel.ll new file mode 100644 index 000000000000..56e8f16f9b36 --- /dev/null +++ b/test/CodeGen/AArch64/fp-cond-sel.ll @@ -0,0 +1,26 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@varfloat = global float 0.0 +@vardouble = global double 0.0 + +define void @test_csel(i32 %lhs32, i32 %rhs32, i64 %lhs64) { +; CHECK: test_csel: + + %tst1 = icmp ugt i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, float 0.0, float 1.0 + store float %val1, float* @varfloat +; CHECK: ldr [[FLT0:s[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI +; CHECK: fmov [[FLT1:s[0-9]+]], #1.0 +; CHECK: fcsel {{s[0-9]+}}, [[FLT0]], [[FLT1]], hi + + %rhs64 = sext i32 %rhs32 to i64 + %tst2 = icmp sle i64 %lhs64, %rhs64 + %val2 = select i1 %tst2, double 1.0, double 0.0 + store double %val2, double* @vardouble +; CHECK: ldr [[FLT0:d[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI +; CHECK: fmov [[FLT1:d[0-9]+]], #1.0 +; CHECK: fcsel {{d[0-9]+}}, [[FLT1]], [[FLT0]], le + + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/fp-dp3.ll b/test/CodeGen/AArch64/fp-dp3.ll new file mode 100644 index 000000000000..39db9be15771 --- /dev/null +++ b/test/CodeGen/AArch64/fp-dp3.ll @@ -0,0 +1,102 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s + +declare float @llvm.fma.f32(float, float, float) +declare double @llvm.fma.f64(double, double, double) + +define float @test_fmadd(float %a, float %b, float %c) { +; CHECK: test_fmadd: + %val = call float @llvm.fma.f32(float %a, float %b, float %c) +; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %val +} + +define float @test_fmsub(float %a, float %b, float %c) { +; CHECK: test_fmsub: + %nega = fsub float -0.0, %a + %val = call float @llvm.fma.f32(float %nega, float %b, float %c) +; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %val +} + +define float @test_fnmadd(float %a, float %b, float %c) { +; CHECK: test_fnmadd: + %negc = fsub float -0.0, %c + %val = call float @llvm.fma.f32(float %a, float %b, float %negc) +; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %val +} + +define float @test_fnmsub(float %a, float %b, float %c) { +; CHECK: test_fnmsub: + %nega = fsub float -0.0, %a + %negc = fsub float -0.0, %c + %val = call float @llvm.fma.f32(float %nega, float %b, float %negc) +; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %val +} + +define double @testd_fmadd(double %a, double %b, double %c) { +; CHECK: testd_fmadd: + %val = call double @llvm.fma.f64(double %a, double %b, double %c) +; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret double %val +} + +define double @testd_fmsub(double %a, double %b, double %c) { +; CHECK: testd_fmsub: + %nega = fsub double -0.0, %a + %val = call double @llvm.fma.f64(double %nega, double %b, double %c) +; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret double %val +} + +define double @testd_fnmadd(double %a, double %b, double %c) { +; CHECK: testd_fnmadd: + %negc = fsub double -0.0, %c + %val = call double @llvm.fma.f64(double %a, double %b, double %negc) +; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret double %val +} + +define double @testd_fnmsub(double %a, double %b, double %c) { +; CHECK: testd_fnmsub: + %nega = fsub double -0.0, %a + %negc = fsub double -0.0, %c + %val = call double @llvm.fma.f64(double %nega, double %b, double %negc) +; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + ret double %val +} + +define float @test_fmadd_unfused(float %a, float %b, float %c) { +; CHECK: test_fmadd_unfused: + %prod = fmul float %b, %c + %sum = fadd float %a, %prod +; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %sum +} + +define float @test_fmsub_unfused(float %a, float %b, float %c) { +; CHECK: test_fmsub_unfused: + %prod = fmul float %b, %c + %diff = fsub float %a, %prod +; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %diff +} + +define float @test_fnmadd_unfused(float %a, float %b, float %c) { +; CHECK: test_fnmadd_unfused: + %nega = fsub float -0.0, %a + %prod = fmul float %b, %c + %sum = fadd float %nega, %prod +; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %sum +} + +define float @test_fnmsub_unfused(float %a, float %b, float %c) { +; CHECK: test_fnmsub_unfused: + %nega = fsub float -0.0, %a + %prod = fmul float %b, %c + %diff = fsub float %nega, %prod +; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + ret float %diff +} diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll new file mode 100644 index 000000000000..b5bdcf4f37b4 --- /dev/null +++ b/test/CodeGen/AArch64/fp128-folding.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +declare void @bar(i8*, i8*, i32*) + +; SelectionDAG used to try to fold some fp128 operations using the ppc128 type, +; which is not supported. + +define fp128 @test_folding() { +; CHECK: test_folding: + %l = alloca i32 + store i32 42, i32* %l + %val = load i32* %l + %fpval = sitofp i32 %val to fp128 + ; If the value is loaded from a constant pool into an fp128, it's been folded + ; successfully. +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI + ret fp128 %fpval +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/fp128.ll b/test/CodeGen/AArch64/fp128.ll new file mode 100644 index 000000000000..258d34b8f81f --- /dev/null +++ b/test/CodeGen/AArch64/fp128.ll @@ -0,0 +1,280 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +@lhs = global fp128 zeroinitializer +@rhs = global fp128 zeroinitializer + +define fp128 @test_add() { +; CHECK: test_add: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + + %val = fadd fp128 %lhs, %rhs +; CHECK: bl __addtf3 + ret fp128 %val +} + +define fp128 @test_sub() { +; CHECK: test_sub: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + + %val = fsub fp128 %lhs, %rhs +; CHECK: bl __subtf3 + ret fp128 %val +} + +define fp128 @test_mul() { +; CHECK: test_mul: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + + %val = fmul fp128 %lhs, %rhs +; CHECK: bl __multf3 + ret fp128 %val +} + +define fp128 @test_div() { +; CHECK: test_div: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + + %val = fdiv fp128 %lhs, %rhs +; CHECK: bl __divtf3 + ret fp128 %val +} + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_fptosi() { +; CHECK: test_fptosi: + %val = load fp128* @lhs + + %val32 = fptosi fp128 %val to i32 + store i32 %val32, i32* @var32 +; CHECK: bl __fixtfsi + + %val64 = fptosi fp128 %val to i64 + store i64 %val64, i64* @var64 +; CHECK: bl __fixtfdi + + ret void +} + +define void @test_fptoui() { +; CHECK: test_fptoui: + %val = load fp128* @lhs + + %val32 = fptoui fp128 %val to i32 + store i32 %val32, i32* @var32 +; CHECK: bl __fixunstfsi + + %val64 = fptoui fp128 %val to i64 + store i64 %val64, i64* @var64 +; CHECK: bl __fixunstfdi + + ret void +} + +define void @test_sitofp() { +; CHECK: test_sitofp: + + %src32 = load i32* @var32 + %val32 = sitofp i32 %src32 to fp128 + store volatile fp128 %val32, fp128* @lhs +; CHECK: bl __floatsitf + + %src64 = load i64* @var64 + %val64 = sitofp i64 %src64 to fp128 + store volatile fp128 %val64, fp128* @lhs +; CHECK: bl __floatditf + + ret void +} + +define void @test_uitofp() { +; CHECK: test_uitofp: + + %src32 = load i32* @var32 + %val32 = uitofp i32 %src32 to fp128 + store volatile fp128 %val32, fp128* @lhs +; CHECK: bl __floatunsitf + + %src64 = load i64* @var64 + %val64 = uitofp i64 %src64 to fp128 + store volatile fp128 %val64, fp128* @lhs +; CHECK: bl __floatunditf + + ret void +} + +define i1 @test_setcc1() { +; CHECK: test_setcc1: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + +; Technically, everything after the call to __letf2 is redundant, but we'll let +; LLVM have its fun for now. + %val = fcmp ole fp128 %lhs, %rhs +; CHECK: bl __letf2 +; CHECK: cmp w0, #0 +; CHECK: csinc w0, wzr, wzr, gt + + ret i1 %val +; CHECK: ret +} + +define i1 @test_setcc2() { +; CHECK: test_setcc2: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + +; Technically, everything after the call to __letf2 is redundant, but we'll let +; LLVM have its fun for now. + %val = fcmp ugt fp128 %lhs, %rhs +; CHECK: bl __unordtf2 +; CHECK: mov x[[UNORDERED:[0-9]+]], x0 + +; CHECK: bl __gttf2 +; CHECK: cmp w0, #0 +; CHECK: csinc [[GT:w[0-9]+]], wzr, wzr, le +; CHECK: cmp w[[UNORDERED]], #0 +; CHECK: csinc [[UNORDERED:w[0-9]+]], wzr, wzr, eq +; CHECK: orr w0, [[UNORDERED]], [[GT]] + + ret i1 %val +; CHECK: ret +} + +define i32 @test_br_cc() { +; CHECK: test_br_cc: + + %lhs = load fp128* @lhs + %rhs = load fp128* @rhs +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:lhs] +; CHECK: ldr q1, [{{x[0-9]+}}, #:lo12:rhs] + + ; olt == !uge, which LLVM unfortunately "optimizes" this to. + %cond = fcmp olt fp128 %lhs, %rhs +; CHECK: bl __unordtf2 +; CHECK: mov x[[UNORDERED:[0-9]+]], x0 + +; CHECK: bl __getf2 +; CHECK: cmp w0, #0 + +; CHECK: csinc [[OGE:w[0-9]+]], wzr, wzr, lt +; CHECK: cmp w[[UNORDERED]], #0 +; CHECK: csinc [[UNORDERED:w[0-9]+]], wzr, wzr, eq +; CHECK: orr [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]] +; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]] + br i1 %cond, label %iftrue, label %iffalse + +iftrue: + ret i32 42 +; CHECK-NEXT: BB# +; CHECK-NEXT: movz x0, #42 +; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]] + +iffalse: + ret i32 29 +; CHECK: [[RET29]]: +; CHECK-NEXT: movz x0, #29 +; CHECK-NEXT: [[REALRET]]: +; CHECK: ret +} + +define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) { +; CHECK: test_select: + + %val = select i1 %cond, fp128 %lhs, fp128 %rhs + store fp128 %val, fp128* @lhs +; CHECK: cmp w0, #0 +; CHECK: str q1, [sp] +; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]] +; CHECK-NEXT: BB# +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: [[IFFALSE]]: +; CHECK-NEXT: ldr q0, [sp] +; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs] + ret void +; CHECK: ret +} + +@varfloat = global float 0.0 +@vardouble = global double 0.0 + +define void @test_round() { +; CHECK: test_round: + + %val = load fp128* @lhs + + %float = fptrunc fp128 %val to float + store float %float, float* @varfloat +; CHECK: bl __trunctfsf2 +; CHECK: str s0, [{{x[0-9]+}}, #:lo12:varfloat] + + %double = fptrunc fp128 %val to double + store double %double, double* @vardouble +; CHECK: bl __trunctfdf2 +; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble] + + ret void +} + +define void @test_extend() { +; CHECK: test_extend: + + %val = load fp128* @lhs + + %float = load float* @varfloat + %fromfloat = fpext float %float to fp128 + store volatile fp128 %fromfloat, fp128* @lhs +; CHECK: bl __extendsftf2 +; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs] + + %double = load double* @vardouble + %fromdouble = fpext double %double to fp128 + store volatile fp128 %fromdouble, fp128* @lhs +; CHECK: bl __extenddftf2 +; CHECK: str q0, [{{x[0-9]+}}, #:lo12:lhs] + + ret void +; CHECK: ret +} + +define fp128 @test_neg(fp128 %in) { +; CHECK: [[MINUS0:.LCPI[0-9]+_0]]: +; Make sure the weird hex constant below *is* -0.0 +; CHECK-NEXT: fp128 -0 + +; CHECK: test_neg: + + ; Could in principle be optimized to fneg which we can't select, this makes + ; sure that doesn't happen. + %ret = fsub fp128 0xL00000000000000008000000000000000, %in +; CHECK: str q0, [sp, #-16] +; CHECK-NEXT: ldr q1, [sp], #16 +; CHECK: ldr q0, [{{x[0-9]+}}, #:lo12:[[MINUS0]]] +; CHECK: bl __subtf3 + + ret fp128 %ret +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll new file mode 100644 index 000000000000..fd28aeef9291 --- /dev/null +++ b/test/CodeGen/AArch64/fpimm.ll @@ -0,0 +1,34 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@varf32 = global float 0.0 +@varf64 = global double 0.0 + +define void @check_float() { +; CHECK: check_float: + + %val = load float* @varf32 + %newval1 = fadd float %val, 8.5 + store volatile float %newval1, float* @varf32 +; CHECK: fmov {{s[0-9]+}}, #8.5 + + %newval2 = fadd float %val, 128.0 + store volatile float %newval2, float* @varf32 +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI0_0 + + ret void +} + +define void @check_double() { +; CHECK: check_double: + + %val = load double* @varf64 + %newval1 = fadd double %val, 8.5 + store volatile double %newval1, double* @varf64 +; CHECK: fmov {{d[0-9]+}}, #8.5 + + %newval2 = fadd double %val, 128.0 + store volatile double %newval2, double* @varf64 +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.LCPI1_0 + + ret void +} diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll new file mode 100644 index 000000000000..78fde6a3c33a --- /dev/null +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -0,0 +1,193 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +%myStruct = type { i64 , i8, i32 } + +@var8 = global i8 0 +@var32 = global i32 0 +@var64 = global i64 0 +@var128 = global i128 0 +@varfloat = global float 0.0 +@vardouble = global double 0.0 +@varstruct = global %myStruct zeroinitializer + +define void @take_i8s(i8 %val1, i8 %val2) { +; CHECK: take_i8s: + store i8 %val2, i8* @var8 + ; Not using w1 may be technically allowed, but it would indicate a + ; problem in itself. +; CHECK: strb w1, [{{x[0-9]+}}, #:lo12:var8] + ret void +} + +define void @add_floats(float %val1, float %val2) { +; CHECK: add_floats: + %newval = fadd float %val1, %val2 +; CHECK: fadd [[ADDRES:s[0-9]+]], s0, s1 + store float %newval, float* @varfloat +; CHECK: str [[ADDRES]], [{{x[0-9]+}}, #:lo12:varfloat] + ret void +} + +; byval pointers should be allocated to the stack and copied as if +; with memcpy. +define void @take_struct(%myStruct* byval %structval) { +; CHECK: take_struct: + %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2 + %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0 + + %val0 = load i32* %addr0 + ; Some weird move means x0 is used for one access +; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12] + store i32 %val0, i32* @var32 +; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32] + + %val1 = load i64* %addr1 +; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}] + store i64 %val1, i64* @var64 +; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64] + + ret void +} + +; %structval should be at sp + 16 +define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) { +; CHECK: check_byval_align: + + %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2 + %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0 + + %val0 = load i32* %addr0 + ; Some weird move means x0 is used for one access +; CHECK: add x[[STRUCTVAL_ADDR:[0-9]+]], sp, #16 +; CHECK: ldr [[REG32:w[0-9]+]], [x[[STRUCTVAL_ADDR]], #12] + store i32 %val0, i32* @var32 +; CHECK: str [[REG32]], [{{x[0-9]+}}, #:lo12:var32] + + %val1 = load i64* %addr1 +; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16] + store i64 %val1, i64* @var64 +; CHECK: str [[REG64]], [{{x[0-9]+}}, #:lo12:var64] + + ret void +} + +define i32 @return_int() { +; CHECK: return_int: + %val = load i32* @var32 + ret i32 %val +; CHECK: ldr w0, [{{x[0-9]+}}, #:lo12:var32] + ; Make sure epilogue follows +; CHECK-NEXT: ret +} + +define double @return_double() { +; CHECK: return_double: + ret double 3.14 +; CHECK: ldr d0, [{{x[0-9]+}}, #:lo12:.LCPI +} + +; This is the kind of IR clang will produce for returning a struct +; small enough to go into registers. Not all that pretty, but it +; works. +define [2 x i64] @return_struct() { +; CHECK: return_struct: + %addr = bitcast %myStruct* @varstruct to [2 x i64]* + %val = load [2 x i64]* %addr + ret [2 x i64] %val +; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:varstruct] + ; Odd register regex below disallows x0 which we want to be live now. +; CHECK: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, #:lo12:varstruct +; CHECK-NEXT: ldr x1, [{{x[1-9][0-9]*}}, #8] + ; Make sure epilogue immediately follows +; CHECK-NEXT: ret +} + +; Large structs are passed by reference (storage allocated by caller +; to preserve value semantics) in x8. Strictly this only applies to +; structs larger than 16 bytes, but C semantics can still be provided +; if LLVM does it to %myStruct too. So this is the simplest check +define void @return_large_struct(%myStruct* sret %retval) { +; CHECK: return_large_struct: + %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0 + %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1 + %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2 + + store i64 42, i64* %addr0 + store i8 2, i8* %addr1 + store i32 9, i32* %addr2 +; CHECK: str {{x[0-9]+}}, [x8] +; CHECK: strb {{w[0-9]+}}, [x8, #8] +; CHECK: str {{w[0-9]+}}, [x8, #12] + + ret void +} + +; This struct is just too far along to go into registers: (only x7 is +; available, but it needs two). Also make sure that %stacked doesn't +; sneak into x7 behind. +define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45, + i32* %var6, %myStruct* byval %struct, i32* byval %stacked, + double %notstacked) { +; CHECK: struct_on_stack: + %addr = getelementptr %myStruct* %struct, i64 0, i32 0 + %val64 = load i64* %addr + store i64 %val64, i64* @var64 + ; Currently nothing on local stack, so struct should be at sp +; CHECK: ldr [[VAL64:x[0-9]+]], [sp] +; CHECK: str [[VAL64]], [{{x[0-9]+}}, #:lo12:var64] + + store double %notstacked, double* @vardouble +; CHECK-NOT: ldr d0 +; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble + + %retval = load i32* %stacked + ret i32 %retval +; CHECK: ldr w0, [sp, #16] +} + +define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, + float %var4, float %var5, float %var6, float %var7, + float %var8) { +; CHECK: stacked_fpu: + store float %var8, float* @varfloat + ; Beware as above: the offset would be different on big-endian + ; machines if the first ldr were changed to use s-registers. +; CHECK: ldr d[[VALFLOAT:[0-9]+]], [sp] +; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, #:lo12:varfloat] + + ret void +} + +; 128-bit integer types should be passed in xEVEN, xODD rather than +; the reverse. In this case x2 and x3. Nothing should use x1. +define i32 @check_i128_regalign(i32 %val0, i128 %val1, i32 %val2) { +; CHECK: check_i128_regalign + store i128 %val1, i128* @var128 +; CHECK: str x2, [{{x[0-9]+}}, #:lo12:var128] +; CHECK: str x3, [{{x[0-9]+}}, #8] + + ret i32 %val2 +; CHECK: mov x0, x4 +} + +define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, + i32 %val4, i32 %val5, i32 %val6, i32 %val7, + i32 %stack1, i128 %stack2) { +; CHECK: check_i128_stackalign + store i128 %stack2, i128* @var128 + ; Nothing local on stack in current codegen, so first stack is 16 away +; CHECK: ldr {{x[0-9]+}}, [sp, #16] + ; Important point is that we address sp+24 for second dword +; CHECK: add [[REG:x[0-9]+]], sp, #16 +; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG]], #8] + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) + +define i32 @test_extern() { +; CHECK: test_extern: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) +; CHECK: bl memcpy + ret i32 0 +} diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll new file mode 100644 index 000000000000..13b689c40886 --- /dev/null +++ b/test/CodeGen/AArch64/func-calls.ll @@ -0,0 +1,140 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +%myStruct = type { i64 , i8, i32 } + +@var8 = global i8 0 +@var8_2 = global i8 0 +@var32 = global i32 0 +@var64 = global i64 0 +@var128 = global i128 0 +@varfloat = global float 0.0 +@varfloat_2 = global float 0.0 +@vardouble = global double 0.0 +@varstruct = global %myStruct zeroinitializer +@varsmallstruct = global [2 x i64] zeroinitializer + +declare void @take_i8s(i8 %val1, i8 %val2) +declare void @take_floats(float %val1, float %val2) + +define void @simple_args() { +; CHECK: simple_args: + %char1 = load i8* @var8 + %char2 = load i8* @var8_2 + call void @take_i8s(i8 %char1, i8 %char2) +; CHECK: ldrb w0, [{{x[0-9]+}}, #:lo12:var8] +; CHECK: ldrb w1, [{{x[0-9]+}}, #:lo12:var8_2] +; CHECK: bl take_i8s + + %float1 = load float* @varfloat + %float2 = load float* @varfloat_2 + call void @take_floats(float %float1, float %float2) +; CHECK: ldr s1, [{{x[0-9]+}}, #:lo12:varfloat_2] +; CHECK: ldr s0, [{{x[0-9]+}}, #:lo12:varfloat] +; CHECK: bl take_floats + + ret void +} + +declare i32 @return_int() +declare double @return_double() +declare [2 x i64] @return_smallstruct() +declare void @return_large_struct(%myStruct* sret %retval) + +define void @simple_rets() { +; CHECK: simple_rets: + + %int = call i32 @return_int() + store i32 %int, i32* @var32 +; CHECK: bl return_int +; CHECK: str w0, [{{x[0-9]+}}, #:lo12:var32] + + %dbl = call double @return_double() + store double %dbl, double* @vardouble +; CHECK: bl return_double +; CHECK: str d0, [{{x[0-9]+}}, #:lo12:vardouble] + + %arr = call [2 x i64] @return_smallstruct() + store [2 x i64] %arr, [2 x i64]* @varsmallstruct +; CHECK: bl return_smallstruct +; CHECK: str x1, [{{x[0-9]+}}, #8] +; CHECK: str x0, [{{x[0-9]+}}, #:lo12:varsmallstruct] + + call void @return_large_struct(%myStruct* sret @varstruct) +; CHECK: add x8, {{x[0-9]+}}, #:lo12:varstruct +; CHECK: bl return_large_struct + + ret void +} + + +declare i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var45, + i32* %var6, %myStruct* byval %struct, i32 %stacked, + double %notstacked) +declare void @stacked_fpu(float %var0, double %var1, float %var2, float %var3, + float %var4, float %var5, float %var6, float %var7, + float %var8) + +define void @check_stack_args() { + call i32 @struct_on_stack(i8 0, i16 12, i32 42, i64 99, i128 1, + i32* @var32, %myStruct* byval @varstruct, + i32 999, double 1.0) + ; Want to check that the final double is passed in registers and + ; that varstruct is passed on the stack. Rather dependent on how a + ; memcpy gets created, but the following works for now. +; CHECK: mov x0, sp +; CHECK: str {{w[0-9]+}}, [x0] +; CHECK: str {{w[0-9]+}}, [x0, #12] +; CHECK: fmov d0, +; CHECK: bl struct_on_stack + + call void @stacked_fpu(float -1.0, double 1.0, float 4.0, float 2.0, + float -2.0, float -8.0, float 16.0, float 1.0, + float 64.0) +; CHECK: ldr s[[STACKEDREG:[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI +; CHECK: mov x0, sp +; CHECK: str d[[STACKEDREG]], [x0] +; CHECK: bl stacked_fpu + ret void +} + + +declare void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, + i32 %val4, i32 %val5, i32 %val6, i32 %val7, + i32 %stack1, i128 %stack2) + +declare void @check_i128_regalign(i32 %val0, i128 %val1) + + +define void @check_i128_align() { +; CHECK: check_i128_align: + %val = load i128* @var128 + call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3, + i32 4, i32 5, i32 6, i32 7, + i32 42, i128 %val) +; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var128] +; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8] +; CHECK: mov x[[SPREG:[0-9]+]], sp +; CHECK: str [[I128HI]], [x[[SPREG]], #24] +; CHECK: str [[I128LO]], [x[[SPREG]], #16] +; CHECK: bl check_i128_stackalign + + call void @check_i128_regalign(i32 0, i128 42) +; CHECK-NOT: mov x1 +; CHECK: movz x2, #42 +; CHECK: mov x3, xzr +; CHECK: bl check_i128_regalign + + ret void +} + +@fptr = global void()* null + +define void @check_indirect_call() { +; CHECK: check_indirect_call: + %func = load void()** @fptr + call void %func() +; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, #:lo12:fptr] +; CHECK: blr [[FPTR]] + + ret void +} diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll new file mode 100644 index 000000000000..8ed6e551cdeb --- /dev/null +++ b/test/CodeGen/AArch64/global-alignment.ll @@ -0,0 +1,69 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +@var32 = global [3 x i32] zeroinitializer +@var64 = global [3 x i64] zeroinitializer +@var32_align64 = global [3 x i32] zeroinitializer, align 8 + +define i64 @test_align32() { +; CHECK: test_align32: + %addr = bitcast [3 x i32]* @var32 to i64* + + ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to + ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load. + %val = load i64* %addr +; CHECK: adrp [[HIBITS:x[0-9]+]], var32 +; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:var32 +; CHECK: ldr x0, [x[[ADDR]]] + + ret i64 %val +} + +define i64 @test_align64() { +; CHECK: test_align64: + %addr = bitcast [3 x i64]* @var64 to i64* + + ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be + ; inefficient. + %val = load i64* %addr +; CHECK: adrp x[[HIBITS:[0-9]+]], var64 +; CHECK-NOT: add x[[HIBITS]] +; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var64] + + ret i64 %val +} + +define i64 @test_var32_align64() { +; CHECK: test_var32_align64: + %addr = bitcast [3 x i32]* @var32_align64 to i64* + + ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to + ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load. + %val = load i64* %addr +; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64 +; CHECK-NOT: add x[[HIBITS]] +; CHECK: ldr x0, [x[[HIBITS]], #:lo12:var32_align64] + + ret i64 %val +} + +@yet_another_var = external global {i32, i32} + +define i64 @test_yet_another_var() { +; CHECK: test_yet_another_var: + + ; @yet_another_var has a preferred alignment of 8, but that's not enough if + ; we're going to be linking against other things. Its ABI alignment is only 4 + ; so we can't fold the load. + %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*) +; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var +; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], #:lo12:yet_another_var +; CHECK: ldr x0, [x[[ADDR]]] + ret i64 %val +} + +define i64()* @test_functions() { +; CHECK: test_functions: + ret i64()* @test_yet_another_var +; CHECK: adrp [[HIBITS:x[0-9]+]], test_yet_another_var +; CHECK: add x0, [[HIBITS]], #:lo12:test_yet_another_var +} diff --git a/test/CodeGen/AArch64/got-abuse.ll b/test/CodeGen/AArch64/got-abuse.ll new file mode 100644 index 000000000000..c474e5845a64 --- /dev/null +++ b/test/CodeGen/AArch64/got-abuse.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s + +; LLVM gives well-defined semantics to this horrible construct (though C says +; it's undefined). Regardless, we shouldn't crash. The important feature here is +; that in general the only way to access a GOT symbol is via a 64-bit +; load. Neither of these alternatives has the ELF relocations required to +; support it: +; + ldr wD, [xN, #:got_lo12:func] +; + add xD, xN, #:got_lo12:func + +declare void @consume(i32) +declare void @func() + +define void @foo() nounwind { +; CHECK: foo: +entry: + call void @consume(i32 ptrtoint (void ()* @func to i32)) +; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:func +; CHECK: ldr {{x[0-9]+}}, [x[[ADDRHI]], #:got_lo12:func] + ret void +} + diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll new file mode 100644 index 000000000000..f019ea0a6706 --- /dev/null +++ b/test/CodeGen/AArch64/i128-align.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +%struct = type { i32, i128, i8 } + +@var = global %struct zeroinitializer + +define i64 @check_size() { +; CHECK: check_size: + %starti = ptrtoint %struct* @var to i64 + + %endp = getelementptr %struct* @var, i64 1 + %endi = ptrtoint %struct* %endp to i64 + + %diff = sub i64 %endi, %starti + ret i64 %diff +; CHECK: movz x0, #48 +} + +define i64 @check_field() { +; CHECK: check_field: + %starti = ptrtoint %struct* @var to i64 + + %endp = getelementptr %struct* @var, i64 0, i32 1 + %endi = ptrtoint i128* %endp to i64 + + %diff = sub i64 %endi, %starti + ret i64 %diff +; CHECK: movz x0, #16 +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/illegal-float-ops.ll b/test/CodeGen/AArch64/illegal-float-ops.ll new file mode 100644 index 000000000000..446151b8ffac --- /dev/null +++ b/test/CodeGen/AArch64/illegal-float-ops.ll @@ -0,0 +1,221 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +@varfloat = global float 0.0 +@vardouble = global double 0.0 +@varfp128 = global fp128 zeroinitializer + +declare float @llvm.cos.f32(float) +declare double @llvm.cos.f64(double) +declare fp128 @llvm.cos.f128(fp128) + +define void @test_cos(float %float, double %double, fp128 %fp128) { +; CHECK: test_cos: + + %cosfloat = call float @llvm.cos.f32(float %float) + store float %cosfloat, float* @varfloat +; CHECK: bl cosf + + %cosdouble = call double @llvm.cos.f64(double %double) + store double %cosdouble, double* @vardouble +; CHECK: bl cos + + %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128) + store fp128 %cosfp128, fp128* @varfp128 +; CHECK: bl cosl + + ret void +} + +declare float @llvm.exp.f32(float) +declare double @llvm.exp.f64(double) +declare fp128 @llvm.exp.f128(fp128) + +define void @test_exp(float %float, double %double, fp128 %fp128) { +; CHECK: test_exp: + + %expfloat = call float @llvm.exp.f32(float %float) + store float %expfloat, float* @varfloat +; CHECK: bl expf + + %expdouble = call double @llvm.exp.f64(double %double) + store double %expdouble, double* @vardouble +; CHECK: bl exp + + %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128) + store fp128 %expfp128, fp128* @varfp128 +; CHECK: bl expl + + ret void +} + +declare float @llvm.exp2.f32(float) +declare double @llvm.exp2.f64(double) +declare fp128 @llvm.exp2.f128(fp128) + +define void @test_exp2(float %float, double %double, fp128 %fp128) { +; CHECK: test_exp2: + + %exp2float = call float @llvm.exp2.f32(float %float) + store float %exp2float, float* @varfloat +; CHECK: bl exp2f + + %exp2double = call double @llvm.exp2.f64(double %double) + store double %exp2double, double* @vardouble +; CHECK: bl exp2 + + %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128) + store fp128 %exp2fp128, fp128* @varfp128 +; CHECK: bl exp2l + ret void + +} + +declare float @llvm.log.f32(float) +declare double @llvm.log.f64(double) +declare fp128 @llvm.log.f128(fp128) + +define void @test_log(float %float, double %double, fp128 %fp128) { +; CHECK: test_log: + + %logfloat = call float @llvm.log.f32(float %float) + store float %logfloat, float* @varfloat +; CHECK: bl logf + + %logdouble = call double @llvm.log.f64(double %double) + store double %logdouble, double* @vardouble +; CHECK: bl log + + %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128) + store fp128 %logfp128, fp128* @varfp128 +; CHECK: bl logl + + ret void +} + +declare float @llvm.log2.f32(float) +declare double @llvm.log2.f64(double) +declare fp128 @llvm.log2.f128(fp128) + +define void @test_log2(float %float, double %double, fp128 %fp128) { +; CHECK: test_log2: + + %log2float = call float @llvm.log2.f32(float %float) + store float %log2float, float* @varfloat +; CHECK: bl log2f + + %log2double = call double @llvm.log2.f64(double %double) + store double %log2double, double* @vardouble +; CHECK: bl log2 + + %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128) + store fp128 %log2fp128, fp128* @varfp128 +; CHECK: bl log2l + ret void + +} + +declare float @llvm.log10.f32(float) +declare double @llvm.log10.f64(double) +declare fp128 @llvm.log10.f128(fp128) + +define void @test_log10(float %float, double %double, fp128 %fp128) { +; CHECK: test_log10: + + %log10float = call float @llvm.log10.f32(float %float) + store float %log10float, float* @varfloat +; CHECK: bl log10f + + %log10double = call double @llvm.log10.f64(double %double) + store double %log10double, double* @vardouble +; CHECK: bl log10 + + %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128) + store fp128 %log10fp128, fp128* @varfp128 +; CHECK: bl log10l + + ret void +} + +declare float @llvm.sin.f32(float) +declare double @llvm.sin.f64(double) +declare fp128 @llvm.sin.f128(fp128) + +define void @test_sin(float %float, double %double, fp128 %fp128) { +; CHECK: test_sin: + + %sinfloat = call float @llvm.sin.f32(float %float) + store float %sinfloat, float* @varfloat +; CHECK: bl sinf + + %sindouble = call double @llvm.sin.f64(double %double) + store double %sindouble, double* @vardouble +; CHECK: bl sin + + %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128) + store fp128 %sinfp128, fp128* @varfp128 +; CHECK: bl sinl + ret void + +} + +declare float @llvm.pow.f32(float, float) +declare double @llvm.pow.f64(double, double) +declare fp128 @llvm.pow.f128(fp128, fp128) + +define void @test_pow(float %float, double %double, fp128 %fp128) { +; CHECK: test_pow: + + %powfloat = call float @llvm.pow.f32(float %float, float %float) + store float %powfloat, float* @varfloat +; CHECK: bl powf + + %powdouble = call double @llvm.pow.f64(double %double, double %double) + store double %powdouble, double* @vardouble +; CHECK: bl pow + + %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128) + store fp128 %powfp128, fp128* @varfp128 +; CHECK: bl powl + + ret void +} + +declare float @llvm.powi.f32(float, i32) +declare double @llvm.powi.f64(double, i32) +declare fp128 @llvm.powi.f128(fp128, i32) + +define void @test_powi(float %float, double %double, i32 %exponent, fp128 %fp128) { +; CHECK: test_powi: + + %powifloat = call float @llvm.powi.f32(float %float, i32 %exponent) + store float %powifloat, float* @varfloat +; CHECK: bl __powisf2 + + %powidouble = call double @llvm.powi.f64(double %double, i32 %exponent) + store double %powidouble, double* @vardouble +; CHECK: bl __powidf2 + + %powifp128 = call fp128 @llvm.powi.f128(fp128 %fp128, i32 %exponent) + store fp128 %powifp128, fp128* @varfp128 +; CHECK: bl __powitf2 + ret void + +} + +define void @test_frem(float %float, double %double, fp128 %fp128) { +; CHECK: test_frem: + + %fremfloat = frem float %float, %float + store float %fremfloat, float* @varfloat +; CHECK: bl fmodf + + %fremdouble = frem double %double, %double + store double %fremdouble, double* @vardouble +; CHECK: bl fmod + + %fremfp128 = frem fp128 %fp128, %fp128 + store fp128 %fremfp128, fp128* @varfp128 +; CHECK: bl fmodl + + ret void +} diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll new file mode 100644 index 000000000000..d80be8f3a639 --- /dev/null +++ b/test/CodeGen/AArch64/init-array.ll @@ -0,0 +1,9 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s + +define internal void @_GLOBAL__I_a() section ".text.startup" { + ret void +} + +@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }] + +; CHECK: .section .init_array
\ No newline at end of file diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badI.ll b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll new file mode 100644 index 000000000000..c39c57f05822 --- /dev/null +++ b/test/CodeGen/AArch64/inline-asm-constraints-badI.ll @@ -0,0 +1,7 @@ +; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s + +define void @foo() { + ; Out of range immediate for I. + call void asm sideeffect "add x0, x0, $0", "I"(i32 4096) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll new file mode 100644 index 000000000000..47c5f98bf009 --- /dev/null +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK.ll @@ -0,0 +1,7 @@ +; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s + +define void @foo() { + ; 32-bit bitpattern ending in 1101 can't be produced. + call void asm sideeffect "and w0, w0, $0", "K"(i32 13) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll new file mode 100644 index 000000000000..7a5b99e23b3d --- /dev/null +++ b/test/CodeGen/AArch64/inline-asm-constraints-badK2.ll @@ -0,0 +1,7 @@ +; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s + +define void @foo() { + ; 32-bit bitpattern ending in 1101 can't be produced. + call void asm sideeffect "and w0, w0, $0", "K"(i64 4294967296) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/inline-asm-constraints-badL.ll b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll new file mode 100644 index 000000000000..4f0039865a35 --- /dev/null +++ b/test/CodeGen/AArch64/inline-asm-constraints-badL.ll @@ -0,0 +1,7 @@ +; RUN: not llc -mtriple=aarch64-none-linux-gnu < %s + +define void @foo() { + ; 32-bit bitpattern ending in 1101 can't be produced. + call void asm sideeffect "and x0, x0, $0", "L"(i32 13) + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/inline-asm-constraints.ll b/test/CodeGen/AArch64/inline-asm-constraints.ll new file mode 100644 index 000000000000..c232f3208cfa --- /dev/null +++ b/test/CodeGen/AArch64/inline-asm-constraints.ll @@ -0,0 +1,117 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +define i64 @test_inline_constraint_r(i64 %base, i32 %offset) { +; CHECK: test_inline_constraint_r: + %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 %base, i32 %offset) +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw + ret i64 %val +} + +define i16 @test_small_reg(i16 %lhs, i16 %rhs) { +; CHECK: test_small_reg: + %val = call i16 asm sideeffect "add $0, $1, $2, sxth", "=r,r,r"(i16 %lhs, i16 %rhs) +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth + ret i16 %val +} + +define i64 @test_inline_constraint_r_imm(i64 %base, i32 %offset) { +; CHECK: test_inline_constraint_r_imm: + %val = call i64 asm "add $0, $1, $2, sxtw", "=r,r,r"(i64 4, i32 12) +; CHECK: movz [[FOUR:x[0-9]+]], #4 +; CHECK: movz [[TWELVE:w[0-9]+]], #12 +; CHECK: add {{x[0-9]+}}, [[FOUR]], [[TWELVE]], sxtw + ret i64 %val +} + +; m is permitted to have a base/offset form. We don't do that +; currently though. +define i32 @test_inline_constraint_m(i32 *%ptr) { +; CHECK: test_inline_constraint_m: + %val = call i32 asm "ldr $0, $1", "=r,m"(i32 *%ptr) +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] + ret i32 %val +} + +@arr = global [8 x i32] zeroinitializer + +; Q should *never* have base/offset form even if given the chance. +define i32 @test_inline_constraint_Q(i32 *%ptr) { +; CHECK: test_inline_constraint_Q: + %val = call i32 asm "ldr $0, $1", "=r,Q"(i32* getelementptr([8 x i32]* @arr, i32 0, i32 1)) +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}] + ret i32 %val +} + +@dump = global fp128 zeroinitializer + +define void @test_inline_constraint_I() { +; CHECK: test_inline_constraint_I: + call void asm sideeffect "add x0, x0, $0", "I"(i32 0) + call void asm sideeffect "add x0, x0, $0", "I"(i64 4095) +; CHECK: add x0, x0, #0 +; CHECK: add x0, x0, #4095 + + ret void +} + +; Skip J because it's useless + +define void @test_inline_constraint_K() { +; CHECK: test_inline_constraint_K: + call void asm sideeffect "and w0, w0, $0", "K"(i32 2863311530) ; = 0xaaaaaaaa + call void asm sideeffect "and w0, w0, $0", "K"(i32 65535) +; CHECK: and w0, w0, #-1431655766 +; CHECK: and w0, w0, #65535 + + ret void +} + +define void @test_inline_constraint_L() { +; CHECK: test_inline_constraint_L: + call void asm sideeffect "and x0, x0, $0", "L"(i64 4294967296) ; = 0xaaaaaaaa + call void asm sideeffect "and x0, x0, $0", "L"(i64 65535) +; CHECK: and x0, x0, #4294967296 +; CHECK: and x0, x0, #65535 + + ret void +} + +; Skip M and N because we don't support MOV pseudo-instructions yet. + +@var = global i32 0 + +define void @test_inline_constraint_S() { +; CHECK: test_inline_constraint_S: + call void asm sideeffect "adrp x0, $0", "S"(i32* @var) + call void asm sideeffect "adrp x0, ${0:A}", "S"(i32* @var) + call void asm sideeffect "add x0, x0, ${0:L}", "S"(i32* @var) +; CHECK: adrp x0, var +; CHECK: adrp x0, var +; CHECK: add x0, x0, #:lo12:var + ret void +} + +define i32 @test_inline_constraint_S_label(i1 %in) { +; CHECK: test_inline_constraint_S_label: + call void asm sideeffect "adr x0, $0", "S"(i8* blockaddress(@test_inline_constraint_S_label, %loc)) +; CHECK: adr x0, .Ltmp{{[0-9]+}} + br i1 %in, label %loc, label %loc2 +loc: + ret i32 0 +loc2: + ret i32 42 +} + +define void @test_inline_constraint_Y() { +; CHECK: test_inline_constraint_Y: + call void asm sideeffect "fcmp s0, $0", "Y"(float 0.0) +; CHECK: fcmp s0, #0.0 + ret void +} + +define void @test_inline_constraint_Z() { +; CHECK: test_inline_constraint_Z: + call void asm sideeffect "cmp w0, $0", "Z"(i32 0) +; CHECK: cmp w0, #0 + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/inline-asm-modifiers.ll b/test/CodeGen/AArch64/inline-asm-modifiers.ll new file mode 100644 index 000000000000..3b55945561eb --- /dev/null +++ b/test/CodeGen/AArch64/inline-asm-modifiers.ll @@ -0,0 +1,125 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-ELF %s + +@var_simple = hidden global i32 0 +@var_got = global i32 0 +@var_tlsgd = thread_local global i32 0 +@var_tlsld = thread_local(localdynamic) global i32 0 +@var_tlsie = thread_local(initialexec) global i32 0 +@var_tlsle = thread_local(localexec) global i32 0 + +define void @test_inline_modifier_L() nounwind { +; CHECK: test_inline_modifier_L: + call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_simple) + call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_got) + call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsgd) + call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsld) + call void asm sideeffect "ldr x0, [x0, ${0:L}]", "S,~{x0}"(i32* @var_tlsie) + call void asm sideeffect "add x0, x0, ${0:L}", "S,~{x0}"(i32* @var_tlsle) +; CHECK: add x0, x0, #:lo12:var_simple +; CHECK: ldr x0, [x0, #:got_lo12:var_got] +; CHECK: add x0, x0, #:tlsdesc_lo12:var_tlsgd +; CHECK: add x0, x0, #:dtprel_lo12:var_tlsld +; CHECK: ldr x0, [x0, #:gottprel_lo12:var_tlsie] +; CHECK: add x0, x0, #:tprel_lo12:var_tlsle + +; CHECK-ELF: R_AARCH64_ADD_ABS_LO12_NC var_simple +; CHECK-ELF: R_AARCH64_LD64_GOT_LO12_NC var_got +; CHECK-ELF: R_AARCH64_TLSDESC_ADD_LO12_NC var_tlsgd +; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_LO12 var_tlsld +; CHECK-ELF: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC var_tlsie +; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_LO12 var_tlsle + + ret void +} + +define void @test_inline_modifier_G() nounwind { +; CHECK: test_inline_modifier_G: + call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsld) + call void asm sideeffect "add x0, x0, ${0:G}, lsl #12", "S,~{x0}"(i32* @var_tlsle) +; CHECK: add x0, x0, #:dtprel_hi12:var_tlsld, lsl #12 +; CHECK: add x0, x0, #:tprel_hi12:var_tlsle, lsl #12 + +; CHECK-ELF: R_AARCH64_TLSLD_ADD_DTPREL_HI12 var_tlsld +; CHECK-ELF: R_AARCH64_TLSLE_ADD_TPREL_HI12 var_tlsle + + ret void +} + +define void @test_inline_modifier_A() nounwind { +; CHECK: test_inline_modifier_A: + call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_simple) + call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_got) + call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsgd) + call void asm sideeffect "adrp x0, ${0:A}", "S,~{x0}"(i32* @var_tlsie) + ; N.b. All tprel and dtprel relocs are modified: lo12 or granules. +; CHECK: adrp x0, var_simple +; CHECK: adrp x0, :got:var_got +; CHECK: adrp x0, :tlsdesc:var_tlsgd +; CHECK: adrp x0, :gottprel:var_tlsie + +; CHECK-ELF: R_AARCH64_ADR_PREL_PG_HI21 var_simple +; CHECK-ELF: R_AARCH64_ADR_GOT_PAGE var_got +; CHECK-ELF: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd +; CHECK-ELF: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie + + ret void +} + +define void @test_inline_modifier_wx(i32 %small, i64 %big) nounwind { +; CHECK: test_inline_modifier_wx: + call i32 asm sideeffect "add $0, $0, $0", "=r,0"(i32 %small) + call i32 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i32 %small) + call i32 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i32 %small) +; CHECK: //APP +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + + call i64 asm sideeffect "add $0, $0, $0", "=r,0"(i64 %big) + call i64 asm sideeffect "add ${0:w}, ${0:w}, ${0:w}", "=r,0"(i64 %big) + call i64 asm sideeffect "add ${0:x}, ${0:x}, ${0:x}", "=r,0"(i64 %big) +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + + call i32 asm sideeffect "add ${0:w}, ${1:w}, ${1:w}", "=r,r"(i32 0) + call i32 asm sideeffect "add ${0:x}, ${1:x}, ${1:x}", "=r,r"(i32 0) +; CHECK: add {{w[0-9]+}}, wzr, wzr +; CHECK: add {{x[0-9]+}}, xzr, xzr + ret void +} + +define void @test_inline_modifier_bhsdq() nounwind { +; CHECK: test_inline_modifier_bhsdq: + call float asm sideeffect "ldr ${0:b}, [sp]", "=w"() + call float asm sideeffect "ldr ${0:h}, [sp]", "=w"() + call float asm sideeffect "ldr ${0:s}, [sp]", "=w"() + call float asm sideeffect "ldr ${0:d}, [sp]", "=w"() + call float asm sideeffect "ldr ${0:q}, [sp]", "=w"() +; CHECK: ldr b0, [sp] +; CHECK: ldr h0, [sp] +; CHECK: ldr s0, [sp] +; CHECK: ldr d0, [sp] +; CHECK: ldr q0, [sp] + + call double asm sideeffect "ldr ${0:b}, [sp]", "=w"() + call double asm sideeffect "ldr ${0:h}, [sp]", "=w"() + call double asm sideeffect "ldr ${0:s}, [sp]", "=w"() + call double asm sideeffect "ldr ${0:d}, [sp]", "=w"() + call double asm sideeffect "ldr ${0:q}, [sp]", "=w"() +; CHECK: ldr b0, [sp] +; CHECK: ldr h0, [sp] +; CHECK: ldr s0, [sp] +; CHECK: ldr d0, [sp] +; CHECK: ldr q0, [sp] + ret void +} + +define void @test_inline_modifier_c() nounwind { +; CHECK: test_inline_modifier_c: + call void asm sideeffect "adr x0, ${0:c}", "i"(i32 3) +; CHECK: adr x0, 3 + + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll new file mode 100644 index 000000000000..dcf9f4ed455c --- /dev/null +++ b/test/CodeGen/AArch64/jump-table.ll @@ -0,0 +1,56 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF + +define i32 @test_jumptable(i32 %in) { +; CHECK: test_jumptable + + switch i32 %in, label %def [ + i32 0, label %lbl1 + i32 1, label %lbl2 + i32 2, label %lbl3 + i32 4, label %lbl4 + ] +; CHECK: adrp [[JTPAGE:x[0-9]+]], .LJTI0_0 +; CHECK: add x[[JT:[0-9]+]], [[JTPAGE]], #:lo12:.LJTI0_0 +; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3] +; CHECK: br [[DEST]] + +def: + ret i32 0 + +lbl1: + ret i32 1 + +lbl2: + ret i32 2 + +lbl3: + ret i32 4 + +lbl4: + ret i32 8 + +} + +; CHECK: .rodata + +; CHECK: .LJTI0_0: +; CHECK-NEXT: .xword +; CHECK-NEXT: .xword +; CHECK-NEXT: .xword +; CHECK-NEXT: .xword +; CHECK-NEXT: .xword + +; ELF tests: + +; First make sure we get a page/lo12 pair in .text to pick up the jump-table +; CHECK-ELF: .rela.text +; CHECK-ELF: ('r_sym', 0x00000008) +; CHECK-ELF-NEXT: ('r_type', 0x00000113) +; CHECK-ELF: ('r_sym', 0x00000008) +; CHECK-ELF-NEXT: ('r_type', 0x00000115) + +; Also check the targets in .rodata are relocated +; CHECK-ELF: .rela.rodata +; CHECK-ELF: ('r_sym', 0x00000005) +; CHECK-ELF-NEXT: ('r_type', 0x00000101)
\ No newline at end of file diff --git a/test/CodeGen/AArch64/large-frame.ll b/test/CodeGen/AArch64/large-frame.ll new file mode 100644 index 000000000000..2b2e1295c4f6 --- /dev/null +++ b/test/CodeGen/AArch64/large-frame.ll @@ -0,0 +1,114 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +declare void @use_addr(i8*) + +@addr = global i8* null + +define void @test_bigframe() { +; CHECK: test_bigframe: + + %var1 = alloca i8, i32 20000000 + %var2 = alloca i8, i32 16 + %var3 = alloca i8, i32 20000000 +; CHECK: sub sp, sp, #496 +; CHECK: str x30, [sp, #488] + ; Total adjust is 39999536 +; CHECK: movz [[SUBCONST:x[0-9]+]], #22576 +; CHECK: movk [[SUBCONST]], #610, lsl #16 +; CHECK: sub sp, sp, [[SUBCONST]] + + ; Total offset is 20000024 +; CHECK: movz [[VAR1OFFSET:x[0-9]+]], #11544 +; CHECK: movk [[VAR1OFFSET]], #305, lsl #16 +; CHECK: add {{x[0-9]+}}, sp, [[VAR1OFFSET]] + store volatile i8* %var1, i8** @addr + + %var1plus2 = getelementptr i8* %var1, i32 2 + store volatile i8* %var1plus2, i8** @addr + +; CHECK: movz [[VAR2OFFSET:x[0-9]+]], #11528 +; CHECK: movk [[VAR2OFFSET]], #305, lsl #16 +; CHECK: add {{x[0-9]+}}, sp, [[VAR2OFFSET]] + store volatile i8* %var2, i8** @addr + + %var2plus2 = getelementptr i8* %var2, i32 2 + store volatile i8* %var2plus2, i8** @addr + + store volatile i8* %var3, i8** @addr + + %var3plus2 = getelementptr i8* %var3, i32 2 + store volatile i8* %var3plus2, i8** @addr + +; CHECK: movz [[ADDCONST:x[0-9]+]], #22576 +; CHECK: movk [[ADDCONST]], #610, lsl #16 +; CHECK: add sp, sp, [[ADDCONST]] + ret void +} + +define void @test_mediumframe() { +; CHECK: test_mediumframe: + %var1 = alloca i8, i32 1000000 + %var2 = alloca i8, i32 16 + %var3 = alloca i8, i32 1000000 +; CHECK: sub sp, sp, #496 +; CHECK: str x30, [sp, #488] +; CHECK: sub sp, sp, #688 +; CHECK-NEXT: sub sp, sp, #488, lsl #12 + + store volatile i8* %var1, i8** @addr +; CHECK: add [[VAR1ADDR:x[0-9]+]], sp, #600 +; CHECK: add [[VAR1ADDR]], [[VAR1ADDR]], #244, lsl #12 + + %var1plus2 = getelementptr i8* %var1, i32 2 + store volatile i8* %var1plus2, i8** @addr +; CHECK: add [[VAR1PLUS2:x[0-9]+]], {{x[0-9]+}}, #2 + + store volatile i8* %var2, i8** @addr +; CHECK: add [[VAR2ADDR:x[0-9]+]], sp, #584 +; CHECK: add [[VAR2ADDR]], [[VAR2ADDR]], #244, lsl #12 + + %var2plus2 = getelementptr i8* %var2, i32 2 + store volatile i8* %var2plus2, i8** @addr +; CHECK: add [[VAR2PLUS2:x[0-9]+]], {{x[0-9]+}}, #2 + + store volatile i8* %var3, i8** @addr + + %var3plus2 = getelementptr i8* %var3, i32 2 + store volatile i8* %var3plus2, i8** @addr + +; CHECK: add sp, sp, #688 +; CHECK: add sp, sp, #488, lsl #12 +; CHECK: ldr x30, [sp, #488] +; CHECK: add sp, sp, #496 + ret void +} + + +@bigspace = global [8 x i64] zeroinitializer + +; If temporary registers are allocated for adjustment, they should *not* clobber +; argument registers. +define void @test_tempallocation([8 x i64] %val) nounwind { +; CHECK: test_tempallocation: + %var = alloca i8, i32 1000000 +; CHECK: sub sp, sp, + +; Make sure the prologue is reasonably efficient +; CHECK-NEXT: stp x29, x30, [sp, +; CHECK-NEXT: stp x25, x26, [sp, +; CHECK-NEXT: stp x23, x24, [sp, +; CHECK-NEXT: stp x21, x22, [sp, +; CHECK-NEXT: stp x19, x20, [sp, + +; Make sure we don't trash an argument register +; CHECK-NOT: movz {{x[0-7],}} +; CHECK: sub sp, sp, + +; CHECK-NOT: movz {{x[0-7],}} + +; CHECK: bl use_addr + call void @use_addr(i8* %var) + + store [8 x i64] %val, [8 x i64]* @bigspace + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll new file mode 100644 index 000000000000..45935129fd7e --- /dev/null +++ b/test/CodeGen/AArch64/ldst-regoffset.ll @@ -0,0 +1,333 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var_8bit = global i8 0 +@var_16bit = global i16 0 +@var_32bit = global i32 0 +@var_64bit = global i64 0 + +@var_float = global float 0.0 +@var_double = global double 0.0 + +define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_8bit: + + %addr8_sxtw = getelementptr i8* %base, i32 %off32 + %val8_sxtw = load volatile i8* %addr8_sxtw + %val32_signed = sext i8 %val8_sxtw to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + %addr_lsl = getelementptr i8* %base, i64 %off64 + %val8_lsl = load volatile i8* %addr_lsl + %val32_unsigned = zext i8 %val8_lsl to i32 + store volatile i32 %val32_unsigned, i32* @var_32bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %addrint_uxtw = ptrtoint i8* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8* + %val8_uxtw = load volatile i8* %addr_uxtw + %newval8 = add i8 %val8_uxtw, 1 + store volatile i8 %newval8, i8* @var_8bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + ret void +} + + +define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_16bit: + + %addr8_sxtwN = getelementptr i16* %base, i32 %off32 + %val8_sxtwN = load volatile i16* %addr8_sxtwN + %val32_signed = sext i16 %val8_sxtwN to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #1] + + %addr_lslN = getelementptr i16* %base, i64 %off64 + %val8_lslN = load volatile i16* %addr_lslN + %val32_unsigned = zext i16 %val8_lslN to i32 + store volatile i32 %val32_unsigned, i32* @var_32bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1] + + %addrint_uxtw = ptrtoint i16* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16* + %val8_uxtw = load volatile i16* %addr_uxtw + %newval8 = add i16 %val8_uxtw, 1 + store volatile i16 %newval8, i16* @var_16bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint i16* %base to i64 + %offset_sxtw = sext i32 %off32 to i64 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to i16* + %val16_sxtw = load volatile i16* %addr_sxtw + %val64_signed = sext i16 %val16_sxtw to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + + %base_lsl = ptrtoint i16* %base to i64 + %addrint_lsl = add i64 %base_lsl, %off64 + %addr_lsl = inttoptr i64 %addrint_lsl to i16* + %val16_lsl = load volatile i16* %addr_lsl + %val64_unsigned = zext i16 %val16_lsl to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %base_uxtwN = ptrtoint i16* %base to i64 + %offset_uxtwN = zext i32 %off32 to i64 + %offset2_uxtwN = shl i64 %offset_uxtwN, 1 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16* + %val32 = load volatile i32* @var_32bit + %val16_trunc32 = trunc i32 %val32 to i16 + store volatile i16 %val16_trunc32, i16* %addr_uxtwN +; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1] + ret void +} + +define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_32bit: + + %addr_sxtwN = getelementptr i32* %base, i32 %off32 + %val_sxtwN = load volatile i32* %addr_sxtwN + store volatile i32 %val_sxtwN, i32* @var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2] + + %addr_lslN = getelementptr i32* %base, i64 %off64 + %val_lslN = load volatile i32* %addr_lslN + store volatile i32 %val_lslN, i32* @var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2] + + %addrint_uxtw = ptrtoint i32* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32* + %val_uxtw = load volatile i32* %addr_uxtw + %newval8 = add i32 %val_uxtw, 1 + store volatile i32 %newval8, i32* @var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + + %base_sxtw = ptrtoint i32* %base to i64 + %offset_sxtw = sext i32 %off32 to i64 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to i32* + %val16_sxtw = load volatile i32* %addr_sxtw + %val64_signed = sext i32 %val16_sxtw to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + + %base_lsl = ptrtoint i32* %base to i64 + %addrint_lsl = add i64 %base_lsl, %off64 + %addr_lsl = inttoptr i64 %addrint_lsl to i32* + %val16_lsl = load volatile i32* %addr_lsl + %val64_unsigned = zext i32 %val16_lsl to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %base_uxtwN = ptrtoint i32* %base to i64 + %offset_uxtwN = zext i32 %off32 to i64 + %offset2_uxtwN = shl i64 %offset_uxtwN, 2 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32* + %val32 = load volatile i32* @var_32bit + store volatile i32 %val32, i32* %addr_uxtwN +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2] + ret void +} + +define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_64bit: + + %addr_sxtwN = getelementptr i64* %base, i32 %off32 + %val_sxtwN = load volatile i64* %addr_sxtwN + store volatile i64 %val_sxtwN, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3] + + %addr_lslN = getelementptr i64* %base, i64 %off64 + %val_lslN = load volatile i64* %addr_lslN + store volatile i64 %val_lslN, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3] + + %addrint_uxtw = ptrtoint i64* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64* + %val8_uxtw = load volatile i64* %addr_uxtw + %newval8 = add i64 %val8_uxtw, 1 + store volatile i64 %newval8, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint i64* %base to i64 + %offset_sxtw = sext i32 %off32 to i64 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to i64* + %val64_sxtw = load volatile i64* %addr_sxtw + store volatile i64 %val64_sxtw, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + %base_lsl = ptrtoint i64* %base to i64 + %addrint_lsl = add i64 %base_lsl, %off64 + %addr_lsl = inttoptr i64 %addrint_lsl to i64* + %val64_lsl = load volatile i64* %addr_lsl + store volatile i64 %val64_lsl, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %base_uxtwN = ptrtoint i64* %base to i64 + %offset_uxtwN = zext i32 %off32 to i64 + %offset2_uxtwN = shl i64 %offset_uxtwN, 3 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64* + %val64 = load volatile i64* @var_64bit + store volatile i64 %val64, i64* %addr_uxtwN +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3] + ret void +} + +define void @ldst_float(float* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_float: + + %addr_sxtwN = getelementptr float* %base, i32 %off32 + %val_sxtwN = load volatile float* %addr_sxtwN + store volatile float %val_sxtwN, float* @var_float +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #2] + + %addr_lslN = getelementptr float* %base, i64 %off64 + %val_lslN = load volatile float* %addr_lslN + store volatile float %val_lslN, float* @var_float +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2] + + %addrint_uxtw = ptrtoint float* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to float* + %val_uxtw = load volatile float* %addr_uxtw + store volatile float %val_uxtw, float* @var_float +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint float* %base to i64 + %offset_sxtw = sext i32 %off32 to i64 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to float* + %val64_sxtw = load volatile float* %addr_sxtw + store volatile float %val64_sxtw, float* @var_float +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + %base_lsl = ptrtoint float* %base to i64 + %addrint_lsl = add i64 %base_lsl, %off64 + %addr_lsl = inttoptr i64 %addrint_lsl to float* + %val64_lsl = load volatile float* %addr_lsl + store volatile float %val64_lsl, float* @var_float +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %base_uxtwN = ptrtoint float* %base to i64 + %offset_uxtwN = zext i32 %off32 to i64 + %offset2_uxtwN = shl i64 %offset_uxtwN, 2 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float* + %val64 = load volatile float* @var_float + store volatile float %val64, float* %addr_uxtwN +; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2] + ret void +} + +define void @ldst_double(double* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_double: + + %addr_sxtwN = getelementptr double* %base, i32 %off32 + %val_sxtwN = load volatile double* %addr_sxtwN + store volatile double %val_sxtwN, double* @var_double +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #3] + + %addr_lslN = getelementptr double* %base, i64 %off64 + %val_lslN = load volatile double* %addr_lslN + store volatile double %val_lslN, double* @var_double +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3] + + %addrint_uxtw = ptrtoint double* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to double* + %val_uxtw = load volatile double* %addr_uxtw + store volatile double %val_uxtw, double* @var_double +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint double* %base to i64 + %offset_sxtw = sext i32 %off32 to i64 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to double* + %val64_sxtw = load volatile double* %addr_sxtw + store volatile double %val64_sxtw, double* @var_double +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + %base_lsl = ptrtoint double* %base to i64 + %addrint_lsl = add i64 %base_lsl, %off64 + %addr_lsl = inttoptr i64 %addrint_lsl to double* + %val64_lsl = load volatile double* %addr_lsl + store volatile double %val64_lsl, double* @var_double +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %base_uxtwN = ptrtoint double* %base to i64 + %offset_uxtwN = zext i32 %off32 to i64 + %offset2_uxtwN = shl i64 %offset_uxtwN, 3 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double* + %val64 = load volatile double* @var_double + store volatile double %val64, double* %addr_uxtwN +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3] + ret void +} + + +define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) { +; CHECK: ldst_128bit: + + %addr_sxtwN = getelementptr fp128* %base, i32 %off32 + %val_sxtwN = load volatile fp128* %addr_sxtwN + store volatile fp128 %val_sxtwN, fp128* %base +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw #4] + + %addr_lslN = getelementptr fp128* %base, i64 %off64 + %val_lslN = load volatile fp128* %addr_lslN + store volatile fp128 %val_lslN, fp128* %base +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4] + + %addrint_uxtw = ptrtoint fp128* %base to i64 + %offset_uxtw = zext i32 %off32 to i64 + %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw + %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128* + %val_uxtw = load volatile fp128* %addr_uxtw + store volatile fp128 %val_uxtw, fp128* %base +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw] + + %base_sxtw = ptrtoint fp128* %base to i64 + %offset_sxtw = sext i32 %off32 to i64 + %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw + %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128* + %val64_sxtw = load volatile fp128* %addr_sxtw + store volatile fp128 %val64_sxtw, fp128* %base +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw] + + %base_lsl = ptrtoint fp128* %base to i64 + %addrint_lsl = add i64 %base_lsl, %off64 + %addr_lsl = inttoptr i64 %addrint_lsl to fp128* + %val64_lsl = load volatile fp128* %addr_lsl + store volatile fp128 %val64_lsl, fp128* %base +; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}] + + %base_uxtwN = ptrtoint fp128* %base to i64 + %offset_uxtwN = zext i32 %off32 to i64 + %offset2_uxtwN = shl i64 %offset_uxtwN, 4 + %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN + %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128* + %val64 = load volatile fp128* %base + store volatile fp128 %val64, fp128* %addr_uxtwN +; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #4] + ret void +} diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll new file mode 100644 index 000000000000..78a3c83c3dd8 --- /dev/null +++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll @@ -0,0 +1,218 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var_8bit = global i8 0 +@var_16bit = global i16 0 +@var_32bit = global i32 0 +@var_64bit = global i64 0 + +@var_float = global float 0.0 +@var_double = global double 0.0 + +@varptr = global i8* null + +define void @ldst_8bit() { +; CHECK: ldst_8bit: + +; No architectural support for loads to 16-bit or 8-bit since we +; promote i8 during lowering. + %addr_8bit = load i8** @varptr + +; match a sign-extending load 8-bit -> 32-bit + %addr_sext32 = getelementptr i8* %addr_8bit, i64 -256 + %val8_sext32 = load volatile i8* %addr_sext32 + %val32_signed = sext i8 %val8_sext32 to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256] + +; match a zero-extending load volatile 8-bit -> 32-bit + %addr_zext32 = getelementptr i8* %addr_8bit, i64 -12 + %val8_zext32 = load volatile i8* %addr_zext32 + %val32_unsigned = zext i8 %val8_zext32 to i32 + store volatile i32 %val32_unsigned, i32* @var_32bit +; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12] + +; match an any-extending load volatile 8-bit -> 32-bit + %addr_anyext = getelementptr i8* %addr_8bit, i64 -1 + %val8_anyext = load volatile i8* %addr_anyext + %newval8 = add i8 %val8_anyext, 1 + store volatile i8 %newval8, i8* @var_8bit +; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1] + +; match a sign-extending load volatile 8-bit -> 64-bit + %addr_sext64 = getelementptr i8* %addr_8bit, i64 -5 + %val8_sext64 = load volatile i8* %addr_sext64 + %val64_signed = sext i8 %val8_sext64 to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5] + +; match a zero-extending load volatile 8-bit -> 64-bit. +; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits +; of x0 so it's identical to load volatileing to 32-bits. + %addr_zext64 = getelementptr i8* %addr_8bit, i64 -9 + %val8_zext64 = load volatile i8* %addr_zext64 + %val64_unsigned = zext i8 %val8_zext64 to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9] + +; truncating store volatile 32-bits to 8-bits + %addr_trunc32 = getelementptr i8* %addr_8bit, i64 -256 + %val32 = load volatile i32* @var_32bit + %val8_trunc32 = trunc i32 %val32 to i8 + store volatile i8 %val8_trunc32, i8* %addr_trunc32 +; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256] + +; truncating store volatile 64-bits to 8-bits + %addr_trunc64 = getelementptr i8* %addr_8bit, i64 -1 + %val64 = load volatile i64* @var_64bit + %val8_trunc64 = trunc i64 %val64 to i8 + store volatile i8 %val8_trunc64, i8* %addr_trunc64 +; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1] + + ret void +} + +define void @ldst_16bit() { +; CHECK: ldst_16bit: + +; No architectural support for loads to 16-bit or 16-bit since we +; promote i16 during lowering. + %addr_8bit = load i8** @varptr + +; match a sign-extending load 16-bit -> 32-bit + %addr8_sext32 = getelementptr i8* %addr_8bit, i64 -256 + %addr_sext32 = bitcast i8* %addr8_sext32 to i16* + %val16_sext32 = load volatile i16* %addr_sext32 + %val32_signed = sext i16 %val16_sext32 to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256] + +; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned. + %addr8_zext32 = getelementptr i8* %addr_8bit, i64 15 + %addr_zext32 = bitcast i8* %addr8_zext32 to i16* + %val16_zext32 = load volatile i16* %addr_zext32 + %val32_unsigned = zext i16 %val16_zext32 to i32 + store volatile i32 %val32_unsigned, i32* @var_32bit +; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15] + +; match an any-extending load volatile 16-bit -> 32-bit + %addr8_anyext = getelementptr i8* %addr_8bit, i64 -1 + %addr_anyext = bitcast i8* %addr8_anyext to i16* + %val16_anyext = load volatile i16* %addr_anyext + %newval16 = add i16 %val16_anyext, 1 + store volatile i16 %newval16, i16* @var_16bit +; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1] + +; match a sign-extending load volatile 16-bit -> 64-bit + %addr8_sext64 = getelementptr i8* %addr_8bit, i64 -5 + %addr_sext64 = bitcast i8* %addr8_sext64 to i16* + %val16_sext64 = load volatile i16* %addr_sext64 + %val64_signed = sext i16 %val16_sext64 to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5] + +; match a zero-extending load volatile 16-bit -> 64-bit. +; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits +; of x0 so it's identical to load volatileing to 32-bits. + %addr8_zext64 = getelementptr i8* %addr_8bit, i64 9 + %addr_zext64 = bitcast i8* %addr8_zext64 to i16* + %val16_zext64 = load volatile i16* %addr_zext64 + %val64_unsigned = zext i16 %val16_zext64 to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9] + +; truncating store volatile 32-bits to 16-bits + %addr8_trunc32 = getelementptr i8* %addr_8bit, i64 -256 + %addr_trunc32 = bitcast i8* %addr8_trunc32 to i16* + %val32 = load volatile i32* @var_32bit + %val16_trunc32 = trunc i32 %val32 to i16 + store volatile i16 %val16_trunc32, i16* %addr_trunc32 +; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256] + +; truncating store volatile 64-bits to 16-bits + %addr8_trunc64 = getelementptr i8* %addr_8bit, i64 -1 + %addr_trunc64 = bitcast i8* %addr8_trunc64 to i16* + %val64 = load volatile i64* @var_64bit + %val16_trunc64 = trunc i64 %val64 to i16 + store volatile i16 %val16_trunc64, i16* %addr_trunc64 +; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1] + + ret void +} + +define void @ldst_32bit() { +; CHECK: ldst_32bit: + + %addr_8bit = load i8** @varptr + +; Straight 32-bit load/store + %addr32_8_noext = getelementptr i8* %addr_8bit, i64 1 + %addr32_noext = bitcast i8* %addr32_8_noext to i32* + %val32_noext = load volatile i32* %addr32_noext + store volatile i32 %val32_noext, i32* %addr32_noext +; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1] +; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1] + +; Zero-extension to 64-bits + %addr32_8_zext = getelementptr i8* %addr_8bit, i64 -256 + %addr32_zext = bitcast i8* %addr32_8_zext to i32* + %val32_zext = load volatile i32* %addr32_zext + %val64_unsigned = zext i32 %val32_zext to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256] +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit] + +; Sign-extension to 64-bits + %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12 + %addr32_sext = bitcast i8* %addr32_8_sext to i32* + %val32_sext = load volatile i32* %addr32_sext + %val64_signed = sext i32 %val32_sext to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12] +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit] + +; Truncation from 64-bits + %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255 + %addr64_trunc = bitcast i8* %addr64_8_trunc to i64* + %addr32_8_trunc = getelementptr i8* %addr_8bit, i64 -20 + %addr32_trunc = bitcast i8* %addr32_8_trunc to i32* + + %val64_trunc = load volatile i64* %addr64_trunc + %val32_trunc = trunc i64 %val64_trunc to i32 + store volatile i32 %val32_trunc, i32* %addr32_trunc +; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255] +; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #-20] + + ret void +} + +define void @ldst_float() { +; CHECK: ldst_float: + + %addr_8bit = load i8** @varptr + %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5 + %addrfp = bitcast i8* %addrfp_8 to float* + + %valfp = load volatile float* %addrfp +; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5] + + store volatile float %valfp, float* %addrfp +; CHECK: stur {{s[0-9]+}}, [{{x[0-9]+}}, #-5] + + ret void +} + +define void @ldst_double() { +; CHECK: ldst_double: + + %addr_8bit = load i8** @varptr + %addrfp_8 = getelementptr i8* %addr_8bit, i64 4 + %addrfp = bitcast i8* %addrfp_8 to double* + + %valfp = load volatile double* %addrfp +; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4] + + store volatile double %valfp, double* %addrfp +; CHECK: stur {{d[0-9]+}}, [{{x[0-9]+}}, #4] + + ret void +} diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll new file mode 100644 index 000000000000..1e7540d9be0a --- /dev/null +++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll @@ -0,0 +1,251 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var_8bit = global i8 0 +@var_16bit = global i16 0 +@var_32bit = global i32 0 +@var_64bit = global i64 0 + +@var_float = global float 0.0 +@var_double = global double 0.0 + +define void @ldst_8bit() { +; CHECK: ldst_8bit: + +; No architectural support for loads to 16-bit or 8-bit since we +; promote i8 during lowering. + +; match a sign-extending load 8-bit -> 32-bit + %val8_sext32 = load volatile i8* @var_8bit + %val32_signed = sext i8 %val8_sext32 to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: adrp {{x[0-9]+}}, var_8bit +; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + +; match a zero-extending load volatile 8-bit -> 32-bit + %val8_zext32 = load volatile i8* @var_8bit + %val32_unsigned = zext i8 %val8_zext32 to i32 + store volatile i32 %val32_unsigned, i32* @var_32bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + +; match an any-extending load volatile 8-bit -> 32-bit + %val8_anyext = load volatile i8* @var_8bit + %newval8 = add i8 %val8_anyext, 1 + store volatile i8 %newval8, i8* @var_8bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + +; match a sign-extending load volatile 8-bit -> 64-bit + %val8_sext64 = load volatile i8* @var_8bit + %val64_signed = sext i8 %val8_sext64 to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + +; match a zero-extending load volatile 8-bit -> 64-bit. +; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits +; of x0 so it's identical to load volatileing to 32-bits. + %val8_zext64 = load volatile i8* @var_8bit + %val64_unsigned = zext i8 %val8_zext64 to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + +; truncating store volatile 32-bits to 8-bits + %val32 = load volatile i32* @var_32bit + %val8_trunc32 = trunc i32 %val32 to i8 + store volatile i8 %val8_trunc32, i8* @var_8bit +; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + +; truncating store volatile 64-bits to 8-bits + %val64 = load volatile i64* @var_64bit + %val8_trunc64 = trunc i64 %val64 to i8 + store volatile i8 %val8_trunc64, i8* @var_8bit +; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_8bit] + + ret void +} + +define void @ldst_16bit() { +; CHECK: ldst_16bit: + +; No architectural support for load volatiles to 16-bit promote i16 during +; lowering. + +; match a sign-extending load volatile 16-bit -> 32-bit + %val16_sext32 = load volatile i16* @var_16bit + %val32_signed = sext i16 %val16_sext32 to i32 + store volatile i32 %val32_signed, i32* @var_32bit +; CHECK: adrp {{x[0-9]+}}, var_16bit +; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + +; match a zero-extending load volatile 16-bit -> 32-bit + %val16_zext32 = load volatile i16* @var_16bit + %val32_unsigned = zext i16 %val16_zext32 to i32 + store volatile i32 %val32_unsigned, i32* @var_32bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + +; match an any-extending load volatile 16-bit -> 32-bit + %val16_anyext = load volatile i16* @var_16bit + %newval16 = add i16 %val16_anyext, 1 + store volatile i16 %newval16, i16* @var_16bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + +; match a sign-extending load volatile 16-bit -> 64-bit + %val16_sext64 = load volatile i16* @var_16bit + %val64_signed = sext i16 %val16_sext64 to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + +; match a zero-extending load volatile 16-bit -> 64-bit. +; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits +; of x0 so it's identical to load volatileing to 32-bits. + %val16_zext64 = load volatile i16* @var_16bit + %val64_unsigned = zext i16 %val16_zext64 to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + +; truncating store volatile 32-bits to 16-bits + %val32 = load volatile i32* @var_32bit + %val16_trunc32 = trunc i32 %val32 to i16 + store volatile i16 %val16_trunc32, i16* @var_16bit +; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + +; truncating store volatile 64-bits to 16-bits + %val64 = load volatile i64* @var_64bit + %val16_trunc64 = trunc i64 %val64 to i16 + store volatile i16 %val16_trunc64, i16* @var_16bit +; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_16bit] + + ret void +} + +define void @ldst_32bit() { +; CHECK: ldst_32bit: + +; Straight 32-bit load/store + %val32_noext = load volatile i32* @var_32bit + store volatile i32 %val32_noext, i32* @var_32bit +; CHECK: adrp {{x[0-9]+}}, var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit] +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit] + +; Zero-extension to 64-bits + %val32_zext = load volatile i32* @var_32bit + %val64_unsigned = zext i32 %val32_zext to i64 + store volatile i64 %val64_unsigned, i64* @var_64bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit] +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit] + +; Sign-extension to 64-bits + %val32_sext = load volatile i32* @var_32bit + %val64_signed = sext i32 %val32_sext to i64 + store volatile i64 %val64_signed, i64* @var_64bit +; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit] +; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit] + +; Truncation from 64-bits + %val64_trunc = load volatile i64* @var_64bit + %val32_trunc = trunc i64 %val64_trunc to i32 + store volatile i32 %val32_trunc, i32* @var_32bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_64bit] +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_32bit] + + ret void +} + +@arr8 = global i8* null +@arr16 = global i16* null +@arr32 = global i32* null +@arr64 = global i64* null + +; Now check that our selection copes with accesses more complex than a +; single symbol. Permitted offsets should be folded into the loads and +; stores. Since all forms use the same Operand it's only necessary to +; check the various access-sizes involved. + +define void @ldst_complex_offsets() { +; CHECK: ldst_complex_offsets + %arr8_addr = load volatile i8** @arr8 +; CHECK: adrp {{x[0-9]+}}, arr8 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr8] + + %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1 + %arr8_sub1 = load volatile i8* %arr8_sub1_addr + store volatile i8 %arr8_sub1, i8* @var_8bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1] + + %arr8_sub4095_addr = getelementptr i8* %arr8_addr, i64 4095 + %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr + store volatile i8 %arr8_sub4095, i8* @var_8bit +; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095] + + + %arr16_addr = load volatile i16** @arr16 +; CHECK: adrp {{x[0-9]+}}, arr16 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr16] + + %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1 + %arr16_sub1 = load volatile i16* %arr16_sub1_addr + store volatile i16 %arr16_sub1, i16* @var_16bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2] + + %arr16_sub4095_addr = getelementptr i16* %arr16_addr, i64 4095 + %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr + store volatile i16 %arr16_sub4095, i16* @var_16bit +; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190] + + + %arr32_addr = load volatile i32** @arr32 +; CHECK: adrp {{x[0-9]+}}, arr32 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr32] + + %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1 + %arr32_sub1 = load volatile i32* %arr32_sub1_addr + store volatile i32 %arr32_sub1, i32* @var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4] + + %arr32_sub4095_addr = getelementptr i32* %arr32_addr, i64 4095 + %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr + store volatile i32 %arr32_sub4095, i32* @var_32bit +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380] + + + %arr64_addr = load volatile i64** @arr64 +; CHECK: adrp {{x[0-9]+}}, arr64 +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #:lo12:arr64] + + %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1 + %arr64_sub1 = load volatile i64* %arr64_sub1_addr + store volatile i64 %arr64_sub1, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] + + %arr64_sub4095_addr = getelementptr i64* %arr64_addr, i64 4095 + %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr + store volatile i64 %arr64_sub4095, i64* @var_64bit +; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760] + + ret void +} + +define void @ldst_float() { +; CHECK: ldst_float: + + %valfp = load volatile float* @var_float +; CHECK: adrp {{x[0-9]+}}, var_float +; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float] + + store volatile float %valfp, float* @var_float +; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_float] + + ret void +} + +define void @ldst_double() { +; CHECK: ldst_double: + + %valfp = load volatile double* @var_double +; CHECK: adrp {{x[0-9]+}}, var_double +; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double] + + store volatile double %valfp, double* @var_double +; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:var_double] + + ret void +} diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg new file mode 100644 index 000000000000..c5ce2411ed48 --- /dev/null +++ b/test/CodeGen/AArch64/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'AArch64' in targets: + config.unsupported = True + diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll new file mode 100644 index 000000000000..e09084148fdf --- /dev/null +++ b/test/CodeGen/AArch64/literal_pools.ll @@ -0,0 +1,55 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @foo() { +; CHECK: foo: + %val32 = load i32* @var32 + %val64 = load i64* @var64 + + %val32_lit32 = and i32 %val32, 123456785 + store volatile i32 %val32_lit32, i32* @var32 +; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] +; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]] + + %val64_lit32 = and i64 %val64, 305402420 + store volatile i64 %val64_lit32, i64* @var64 +; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] +; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]] + + %val64_lit32signed = and i64 %val64, -12345678 + store volatile i64 %val64_lit32signed, i64* @var64 +; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] +; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]] + + %val64_lit64 = and i64 %val64, 1234567898765432 + store volatile i64 %val64_lit64, i64* @var64 +; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]] +; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]] + + ret void +} + +@varfloat = global float 0.0 +@vardouble = global double 0.0 + +define void @floating_lits() { +; CHECK: floating_lits: + + %floatval = load float* @varfloat + %newfloat = fadd float %floatval, 128.0 +; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]] +; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]] +; CHECK: fadd + store float %newfloat, float* @varfloat + + %doubleval = load double* @vardouble + %newdouble = fadd double %doubleval, 129.0 +; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]] +; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]] +; CHECK: fadd + store double %newdouble, double* @vardouble + + ret void +} diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll new file mode 100644 index 000000000000..5cbf5a37ec54 --- /dev/null +++ b/test/CodeGen/AArch64/local_vars.ll @@ -0,0 +1,57 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 -disable-fp-elim | FileCheck -check-prefix CHECK-WITHFP %s + +; Make sure a reasonably sane prologue and epilogue are +; generated. This test is not robust in the face of an frame-handling +; evolving, but still has value for unrelated changes, I +; believe. +; +; In particular, it will fail when ldp/stp are used for frame setup, +; when FP-elim is implemented, and when addressing from FP is +; implemented. + +@var = global i64 0 +@local_addr = global i64* null + +declare void @foo() + +define void @trivial_func() nounwind { +; CHECK: trivial_func: // @trivial_func +; CHECK-NEXT: // BB#0 +; CHECK-NEXT: ret + + ret void +} + +define void @trivial_fp_func() { +; CHECK-WITHFP: trivial_fp_func: + +; CHECK-WITHFP: sub sp, sp, #16 +; CHECK-WITHFP: stp x29, x30, [sp] +; CHECK-WITHFP-NEXT: mov x29, sp + +; Dont't really care, but it would be a Bad Thing if this came after the epilogue. +; CHECK: bl foo + call void @foo() + ret void + +; CHECK-WITHFP: ldp x29, x30, [sp] +; CHECK-WITHFP: add sp, sp, #16 + +; CHECK-WITHFP: ret +} + +define void @stack_local() { + %local_var = alloca i64 +; CHECK: stack_local: +; CHECK: sub sp, sp, #16 + + %val = load i64* @var + store i64 %val, i64* %local_var +; CHECK: str {{x[0-9]+}}, [sp, #{{[0-9]+}}] + + store i64* %local_var, i64** @local_addr +; CHECK: add {{x[0-9]+}}, sp, #{{[0-9]+}} + + ret void +} diff --git a/test/CodeGen/AArch64/logical-imm.ll b/test/CodeGen/AArch64/logical-imm.ll new file mode 100644 index 000000000000..5f3f4da0cdad --- /dev/null +++ b/test/CodeGen/AArch64/logical-imm.ll @@ -0,0 +1,84 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_and(i32 %in32, i64 %in64) { +; CHECK: test_and: + + %val0 = and i32 %in32, 2863311530 + store volatile i32 %val0, i32* @var32 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa + + %val1 = and i32 %in32, 4293984240 + store volatile i32 %val1, i32* @var32 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0 + + %val2 = and i64 %in64, 9331882296111890817 + store volatile i64 %val2, i64* @var64 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181 + + %val3 = and i64 %in64, 18429855317404942275 + store volatile i64 %val3, i64* @var64 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3 + + ret void +} + +define void @test_orr(i32 %in32, i64 %in64) { +; CHECK: test_orr: + + %val0 = or i32 %in32, 2863311530 + store volatile i32 %val0, i32* @var32 +; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa + + %val1 = or i32 %in32, 4293984240 + store volatile i32 %val1, i32* @var32 +; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0 + + %val2 = or i64 %in64, 9331882296111890817 + store volatile i64 %val2, i64* @var64 +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181 + + %val3 = or i64 %in64, 18429855317404942275 + store volatile i64 %val3, i64* @var64 +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3 + + ret void +} + +define void @test_eor(i32 %in32, i64 %in64) { +; CHECK: test_eor: + + %val0 = xor i32 %in32, 2863311530 + store volatile i32 %val0, i32* @var32 +; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xaaaaaaaa + + %val1 = xor i32 %in32, 4293984240 + store volatile i32 %val1, i32* @var32 +; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0fff0 + + %val2 = xor i64 %in64, 9331882296111890817 + store volatile i64 %val2, i64* @var64 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0x8181818181818181 + + %val3 = xor i64 %in64, 18429855317404942275 + store volatile i64 %val3, i64* @var64 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, #0xffc3ffc3ffc3ffc3 + + ret void +} + +define void @test_mov(i32 %in32, i64 %in64) { +; CHECK: test_mov: + %val0 = add i32 %in32, 2863311530 + store i32 %val0, i32* @var32 +; CHECK: orr {{w[0-9]+}}, wzr, #0xaaaaaaaa + + %val1 = add i64 %in64, 11068046444225730969 + store i64 %val1, i64* @var64 +; CHECK: orr {{x[0-9]+}}, xzr, #0x9999999999999999 + + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll new file mode 100644 index 000000000000..bbbfcc1b9118 --- /dev/null +++ b/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -0,0 +1,224 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s + +@var1_32 = global i32 0 +@var2_32 = global i32 0 + +@var1_64 = global i64 0 +@var2_64 = global i64 0 + +define void @logical_32bit() { +; CHECK: logical_32bit: + %val1 = load i32* @var1_32 + %val2 = load i32* @var2_32 + + ; First check basic and/bic/or/orn/eor/eon patterns with no shift + %neg_val2 = xor i32 -1, %val2 + + %and_noshift = and i32 %val1, %val2 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %and_noshift, i32* @var1_32 + %bic_noshift = and i32 %neg_val2, %val1 +; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %bic_noshift, i32* @var1_32 + + %or_noshift = or i32 %val1, %val2 +; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %or_noshift, i32* @var1_32 + %orn_noshift = or i32 %neg_val2, %val1 +; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %orn_noshift, i32* @var1_32 + + %xor_noshift = xor i32 %val1, %val2 +; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %xor_noshift, i32* @var1_32 + %xorn_noshift = xor i32 %neg_val2, %val1 +; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} + store volatile i32 %xorn_noshift, i32* @var1_32 + + ; Check the maximum shift on each + %operand_lsl31 = shl i32 %val2, 31 + %neg_operand_lsl31 = xor i32 -1, %operand_lsl31 + + %and_lsl31 = and i32 %val1, %operand_lsl31 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + store volatile i32 %and_lsl31, i32* @var1_32 + %bic_lsl31 = and i32 %val1, %neg_operand_lsl31 +; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + store volatile i32 %bic_lsl31, i32* @var1_32 + + %or_lsl31 = or i32 %val1, %operand_lsl31 +; CHECK: orr {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + store volatile i32 %or_lsl31, i32* @var1_32 + %orn_lsl31 = or i32 %val1, %neg_operand_lsl31 +; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + store volatile i32 %orn_lsl31, i32* @var1_32 + + %xor_lsl31 = xor i32 %val1, %operand_lsl31 +; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + store volatile i32 %xor_lsl31, i32* @var1_32 + %xorn_lsl31 = xor i32 %val1, %neg_operand_lsl31 +; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31 + store volatile i32 %xorn_lsl31, i32* @var1_32 + + ; Check other shifts on a subset + %operand_asr10 = ashr i32 %val2, 10 + %neg_operand_asr10 = xor i32 -1, %operand_asr10 + + %bic_asr10 = and i32 %val1, %neg_operand_asr10 +; CHECK: bic {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10 + store volatile i32 %bic_asr10, i32* @var1_32 + %xor_asr10 = xor i32 %val1, %operand_asr10 +; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, asr #10 + store volatile i32 %xor_asr10, i32* @var1_32 + + %operand_lsr1 = lshr i32 %val2, 1 + %neg_operand_lsr1 = xor i32 -1, %operand_lsr1 + + %orn_lsr1 = or i32 %val1, %neg_operand_lsr1 +; CHECK: orn {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1 + store volatile i32 %orn_lsr1, i32* @var1_32 + %xor_lsr1 = xor i32 %val1, %operand_lsr1 +; CHECK: eor {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsr #1 + store volatile i32 %xor_lsr1, i32* @var1_32 + + %operand_ror20_big = shl i32 %val2, 12 + %operand_ror20_small = lshr i32 %val2, 20 + %operand_ror20 = or i32 %operand_ror20_big, %operand_ror20_small + %neg_operand_ror20 = xor i32 -1, %operand_ror20 + + %xorn_ror20 = xor i32 %val1, %neg_operand_ror20 +; CHECK: eon {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20 + store volatile i32 %xorn_ror20, i32* @var1_32 + %and_ror20 = and i32 %val1, %operand_ror20 +; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, ror #20 + store volatile i32 %and_ror20, i32* @var1_32 + + ret void +} + +define void @logical_64bit() { +; CHECK: logical_64bit: + %val1 = load i64* @var1_64 + %val2 = load i64* @var2_64 + + ; First check basic and/bic/or/orn/eor/eon patterns with no shift + %neg_val2 = xor i64 -1, %val2 + + %and_noshift = and i64 %val1, %val2 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %and_noshift, i64* @var1_64 + %bic_noshift = and i64 %neg_val2, %val1 +; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %bic_noshift, i64* @var1_64 + + %or_noshift = or i64 %val1, %val2 +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %or_noshift, i64* @var1_64 + %orn_noshift = or i64 %neg_val2, %val1 +; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %orn_noshift, i64* @var1_64 + + %xor_noshift = xor i64 %val1, %val2 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %xor_noshift, i64* @var1_64 + %xorn_noshift = xor i64 %neg_val2, %val1 +; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} + store volatile i64 %xorn_noshift, i64* @var1_64 + + ; Check the maximum shift on each + %operand_lsl63 = shl i64 %val2, 63 + %neg_operand_lsl63 = xor i64 -1, %operand_lsl63 + + %and_lsl63 = and i64 %val1, %operand_lsl63 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 + store volatile i64 %and_lsl63, i64* @var1_64 + %bic_lsl63 = and i64 %val1, %neg_operand_lsl63 +; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 + store volatile i64 %bic_lsl63, i64* @var1_64 + + %or_lsl63 = or i64 %val1, %operand_lsl63 +; CHECK: orr {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 + store volatile i64 %or_lsl63, i64* @var1_64 + %orn_lsl63 = or i64 %val1, %neg_operand_lsl63 +; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 + store volatile i64 %orn_lsl63, i64* @var1_64 + + %xor_lsl63 = xor i64 %val1, %operand_lsl63 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 + store volatile i64 %xor_lsl63, i64* @var1_64 + %xorn_lsl63 = xor i64 %val1, %neg_operand_lsl63 +; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 + store volatile i64 %xorn_lsl63, i64* @var1_64 + + ; Check other shifts on a subset + %operand_asr10 = ashr i64 %val2, 10 + %neg_operand_asr10 = xor i64 -1, %operand_asr10 + + %bic_asr10 = and i64 %val1, %neg_operand_asr10 +; CHECK: bic {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10 + store volatile i64 %bic_asr10, i64* @var1_64 + %xor_asr10 = xor i64 %val1, %operand_asr10 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, asr #10 + store volatile i64 %xor_asr10, i64* @var1_64 + + %operand_lsr1 = lshr i64 %val2, 1 + %neg_operand_lsr1 = xor i64 -1, %operand_lsr1 + + %orn_lsr1 = or i64 %val1, %neg_operand_lsr1 +; CHECK: orn {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1 + store volatile i64 %orn_lsr1, i64* @var1_64 + %xor_lsr1 = xor i64 %val1, %operand_lsr1 +; CHECK: eor {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsr #1 + store volatile i64 %xor_lsr1, i64* @var1_64 + + ; Construct a rotate-right from a bunch of other logical + ; operations. DAGCombiner should ensure we the ROTR during + ; selection + %operand_ror20_big = shl i64 %val2, 44 + %operand_ror20_small = lshr i64 %val2, 20 + %operand_ror20 = or i64 %operand_ror20_big, %operand_ror20_small + %neg_operand_ror20 = xor i64 -1, %operand_ror20 + + %xorn_ror20 = xor i64 %val1, %neg_operand_ror20 +; CHECK: eon {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20 + store volatile i64 %xorn_ror20, i64* @var1_64 + %and_ror20 = and i64 %val1, %operand_ror20 +; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, ror #20 + store volatile i64 %and_ror20, i64* @var1_64 + + ret void +} + +define void @flag_setting() { +; CHECK: flag_setting: + %val1 = load i64* @var1_64 + %val2 = load i64* @var2_64 + +; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}} +; CHECK: b.gt .L + %simple_and = and i64 %val1, %val2 + %tst1 = icmp sgt i64 %simple_and, 0 + br i1 %tst1, label %ret, label %test2 + +test2: +; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, lsl #63 +; CHECK: b.lt .L + %shifted_op = shl i64 %val2, 63 + %shifted_and = and i64 %val1, %shifted_op + %tst2 = icmp slt i64 %shifted_and, 0 + br i1 %tst2, label %ret, label %test3 + +test3: +; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}, asr #12 +; CHECK: b.gt .L + %asr_op = ashr i64 %val2, 12 + %asr_and = and i64 %asr_op, %val1 + %tst3 = icmp sgt i64 %asr_and, 0 + br i1 %tst3, label %ret, label %other_exit + +other_exit: + store volatile i64 %val1, i64* @var1_64 + ret void +ret: + ret void +} diff --git a/test/CodeGen/AArch64/logical_shifted_reg.s b/test/CodeGen/AArch64/logical_shifted_reg.s new file mode 100644 index 000000000000..89aea580119b --- /dev/null +++ b/test/CodeGen/AArch64/logical_shifted_reg.s @@ -0,0 +1,208 @@ + .file "/home/timnor01/a64-trunk/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll" + .text + .globl logical_32bit + .type logical_32bit,@function +logical_32bit: // @logical_32bit + .cfi_startproc +// BB#0: + adrp x0, var1_32 + ldr w1, [x0, #:lo12:var1_32] + adrp x0, var2_32 + ldr w2, [x0, #:lo12:var2_32] + and w3, w1, w2 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + bic w3, w1, w2 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + orr w3, w1, w2 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + orn w3, w1, w2 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eor w3, w1, w2 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eon w3, w2, w1 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + and w3, w1, w2, lsl #31 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + bic w3, w1, w2, lsl #31 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + orr w3, w1, w2, lsl #31 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + orn w3, w1, w2, lsl #31 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eor w3, w1, w2, lsl #31 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eon w3, w1, w2, lsl #31 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + bic w3, w1, w2, asr #10 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eor w3, w1, w2, asr #10 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + orn w3, w1, w2, lsr #1 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eor w3, w1, w2, lsr #1 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + eon w3, w1, w2, ror #20 + adrp x0, var1_32 + str w3, [x0, #:lo12:var1_32] + and w1, w1, w2, ror #20 + adrp x0, var1_32 + str w1, [x0, #:lo12:var1_32] + ret +.Ltmp0: + .size logical_32bit, .Ltmp0-logical_32bit + .cfi_endproc + + .globl logical_64bit + .type logical_64bit,@function +logical_64bit: // @logical_64bit + .cfi_startproc +// BB#0: + adrp x0, var1_64 + ldr x0, [x0, #:lo12:var1_64] + adrp x1, var2_64 + ldr x1, [x1, #:lo12:var2_64] + and x2, x0, x1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + bic x2, x0, x1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + orr x2, x0, x1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + orn x2, x0, x1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eor x2, x0, x1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eon x2, x1, x0 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + and x2, x0, x1, lsl #63 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + bic x2, x0, x1, lsl #63 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + orr x2, x0, x1, lsl #63 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + orn x2, x0, x1, lsl #63 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eor x2, x0, x1, lsl #63 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eon x2, x0, x1, lsl #63 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + bic x2, x0, x1, asr #10 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eor x2, x0, x1, asr #10 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + orn x2, x0, x1, lsr #1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eor x2, x0, x1, lsr #1 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + eon x2, x0, x1, ror #20 + adrp x3, var1_64 + str x2, [x3, #:lo12:var1_64] + and x0, x0, x1, ror #20 + adrp x1, var1_64 + str x0, [x1, #:lo12:var1_64] + ret +.Ltmp1: + .size logical_64bit, .Ltmp1-logical_64bit + .cfi_endproc + + .globl flag_setting + .type flag_setting,@function +flag_setting: // @flag_setting + .cfi_startproc +// BB#0: + sub sp, sp, #16 + adrp x0, var1_64 + ldr x0, [x0, #:lo12:var1_64] + adrp x1, var2_64 + ldr x1, [x1, #:lo12:var2_64] + tst x0, x1 + str x0, [sp, #8] // 8-byte Folded Spill + str x1, [sp] // 8-byte Folded Spill + b.gt .LBB2_4 + b .LBB2_1 +.LBB2_1: // %test2 + ldr x0, [sp, #8] // 8-byte Folded Reload + ldr x1, [sp] // 8-byte Folded Reload + tst x0, x1, lsl #63 + b.lt .LBB2_4 + b .LBB2_2 +.LBB2_2: // %test3 + ldr x0, [sp, #8] // 8-byte Folded Reload + ldr x1, [sp] // 8-byte Folded Reload + tst x0, x1, asr #12 + b.gt .LBB2_4 + b .LBB2_3 +.LBB2_3: // %other_exit + adrp x0, var1_64 + ldr x1, [sp, #8] // 8-byte Folded Reload + str x1, [x0, #:lo12:var1_64] + add sp, sp, #16 + ret +.LBB2_4: // %ret + add sp, sp, #16 + ret +.Ltmp2: + .size flag_setting, .Ltmp2-flag_setting + .cfi_endproc + + .type var1_32,@object // @var1_32 + .bss + .globl var1_32 + .align 2 +var1_32: + .word 0 // 0x0 + .size var1_32, 4 + + .type var2_32,@object // @var2_32 + .globl var2_32 + .align 2 +var2_32: + .word 0 // 0x0 + .size var2_32, 4 + + .type var1_64,@object // @var1_64 + .globl var1_64 + .align 3 +var1_64: + .xword 0 // 0x0 + .size var1_64, 8 + + .type var2_64,@object // @var2_64 + .globl var2_64 + .align 3 +var2_64: + .xword 0 // 0x0 + .size var2_64, 8 + + diff --git a/test/CodeGen/AArch64/movw-consts.ll b/test/CodeGen/AArch64/movw-consts.ll new file mode 100644 index 000000000000..b8a5fb932202 --- /dev/null +++ b/test/CodeGen/AArch64/movw-consts.ll @@ -0,0 +1,124 @@ +; RUN: llc -verify-machineinstrs -O0 < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +define i64 @test0() { +; CHECK: test0: +; Not produced by move wide instructions, but good to make sure we can return 0 anyway: +; CHECK: mov x0, xzr + ret i64 0 +} + +define i64 @test1() { +; CHECK: test1: +; CHECK: movz x0, #1 + ret i64 1 +} + +define i64 @test2() { +; CHECK: test2: +; CHECK: movz x0, #65535 + ret i64 65535 +} + +define i64 @test3() { +; CHECK: test3: +; CHECK: movz x0, #1, lsl #16 + ret i64 65536 +} + +define i64 @test4() { +; CHECK: test4: +; CHECK: movz x0, #65535, lsl #16 + ret i64 4294901760 +} + +define i64 @test5() { +; CHECK: test5: +; CHECK: movz x0, #1, lsl #32 + ret i64 4294967296 +} + +define i64 @test6() { +; CHECK: test6: +; CHECK: movz x0, #65535, lsl #32 + ret i64 281470681743360 +} + +define i64 @test7() { +; CHECK: test7: +; CHECK: movz x0, #1, lsl #48 + ret i64 281474976710656 +} + +; A 32-bit MOVN can generate some 64-bit patterns that a 64-bit one +; couldn't. Useful even for i64 +define i64 @test8() { +; CHECK: test8: +; CHECK: movn w0, #60875 + ret i64 4294906420 +} + +define i64 @test9() { +; CHECK: test9: +; CHECK: movn x0, #0 + ret i64 -1 +} + +define i64 @test10() { +; CHECK: test10: +; CHECK: movn x0, #60875, lsl #16 + ret i64 18446744069720047615 +} + +; For reasonably legitimate reasons returning an i32 results in the +; selection of an i64 constant, so we need a different idiom to test that selection +@var32 = global i32 0 + +define void @test11() { +; CHECK: test11: +; CHECK: mov {{w[0-9]+}}, wzr + store i32 0, i32* @var32 + ret void +} + +define void @test12() { +; CHECK: test12: +; CHECK: movz {{w[0-9]+}}, #1 + store i32 1, i32* @var32 + ret void +} + +define void @test13() { +; CHECK: test13: +; CHECK: movz {{w[0-9]+}}, #65535 + store i32 65535, i32* @var32 + ret void +} + +define void @test14() { +; CHECK: test14: +; CHECK: movz {{w[0-9]+}}, #1, lsl #16 + store i32 65536, i32* @var32 + ret void +} + +define void @test15() { +; CHECK: test15: +; CHECK: movz {{w[0-9]+}}, #65535, lsl #16 + store i32 4294901760, i32* @var32 + ret void +} + +define void @test16() { +; CHECK: test16: +; CHECK: movn {{w[0-9]+}}, #0 + store i32 -1, i32* @var32 + ret void +} + +define i64 @test17() { +; CHECK: test17: + + ; Mustn't MOVN w0 here. +; CHECK: movn x0, #2 + ret i64 -3 +} diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll new file mode 100644 index 000000000000..77bf691cbcbd --- /dev/null +++ b/test/CodeGen/AArch64/pic-eh-stubs.ll @@ -0,0 +1,60 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s + +; Make sure exception-handling PIC code can be linked correctly. An alternative +; to the sequence described below would have .gcc_except_table itself writable +; and not use the indirection, but this isn't what LLVM does right now. + + ; There should be a read-only .gcc_except_table section... +; CHECK: .section .gcc_except_table,"a" + + ; ... referring indirectly to stubs for its typeinfo ... +; CHECK: // @TType Encoding = indirect pcrel sdata8 + ; ... one of which is "int"'s typeinfo +; CHECK: .Ltmp9: +; CHECK-NEXT: .xword .L_ZTIi.DW.stub-.Ltmp9 + + ; .. and which is properly defined (in a writable section for the dynamic loader) later. +; CHECK: .section .data.rel,"aw" +; CHECK: .L_ZTIi.DW.stub: +; CHECK-NEXT: .xword _ZTIi + +@_ZTIi = external constant i8* + +define i32 @_Z3barv() { +entry: + invoke void @_Z3foov() + to label %return unwind label %lpad + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) + catch i8* bitcast (i8** @_ZTIi to i8*) + %1 = extractvalue { i8*, i32 } %0, 1 + %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind + %matches = icmp eq i32 %1, %2 + br i1 %matches, label %catch, label %eh.resume + +catch: ; preds = %lpad + %3 = extractvalue { i8*, i32 } %0, 0 + %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind + %5 = bitcast i8* %4 to i32* + %exn.scalar = load i32* %5, align 4 + tail call void @__cxa_end_catch() nounwind + br label %return + +return: ; preds = %entry, %catch + %retval.0 = phi i32 [ %exn.scalar, %catch ], [ 42, %entry ] + ret i32 %retval.0 + +eh.resume: ; preds = %lpad + resume { i8*, i32 } %0 +} + +declare void @_Z3foov() + +declare i32 @__gxx_personality_v0(...) + +declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch()
\ No newline at end of file diff --git a/test/CodeGen/AArch64/regress-bitcast-formals.ll b/test/CodeGen/AArch64/regress-bitcast-formals.ll new file mode 100644 index 000000000000..28dc9a7e2515 --- /dev/null +++ b/test/CodeGen/AArch64/regress-bitcast-formals.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +; CallingConv.td requires a bitcast for vector arguments. Make sure we're +; actually capable of that (the test was omitted from LowerFormalArguments). + +define void @test_bitcast_lower(<2 x i32> %a) { +; CHECK: test_bitcast_lower: + + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/regress-f128csel-flags.ll b/test/CodeGen/AArch64/regress-f128csel-flags.ll new file mode 100644 index 000000000000..b35185ccd6f3 --- /dev/null +++ b/test/CodeGen/AArch64/regress-f128csel-flags.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +; We used to not mark NZCV as being used in the continuation basic-block +; when lowering a 128-bit "select" to branches. This meant a subsequent use +; of the same flags gave an internal fault here. + +declare void @foo(fp128) + +define double @test_f128csel_flags(i32 %lhs, fp128 %a, fp128 %b) nounwind { +; CHECK: test_f128csel_flags + + %tst = icmp ne i32 %lhs, 42 + %val = select i1 %tst, fp128 %a, fp128 %b +; CHECK: cmp w0, #42 +; CHECK: b.eq .LBB0 + + call void @foo(fp128 %val) + %retval = select i1 %tst, double 4.0, double 5.0 + + ; It's also reasonably important that the actual fcsel comes before the + ; function call since bl may corrupt NZCV. We were doing the right thing anyway, + ; but just as well test it while we're here. +; CHECK: fcsel {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, ne +; CHECK: bl foo + + ret double %retval +} diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll new file mode 100644 index 000000000000..8d5485cae4c8 --- /dev/null +++ b/test/CodeGen/AArch64/regress-tail-livereg.ll @@ -0,0 +1,19 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s +@var = global void()* zeroinitializer + +declare void @bar() + +define void @foo() { +; CHECK: foo: + %func = load void()** @var + + ; Calling a function encourages @foo to use a callee-saved register, + ; which makes it a natural choice for the tail call itself. But we don't + ; want that: the final "br xN" has to use a temporary or argument + ; register. + call void @bar() + + tail call void %func() +; CHECK: br {{x([0-79]|1[0-8])}} + ret void +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll new file mode 100644 index 000000000000..e54552fd8edf --- /dev/null +++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -0,0 +1,36 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; When generating DAG selection tables, TableGen used to only flag an +; instruction as needing a chain on its own account if it had a built-in pattern +; which used the chain. This meant that the AArch64 load/stores weren't +; recognised and so both loads from %locvar below were coalesced into a single +; LS8_LDR instruction (same operands other than the non-existent chain) and the +; increment was lost at return. + +; This was obviously a Bad Thing. + +declare void @bar(i8*) + +define i64 @test_chains() { +; CHECK: test_chains: + + %locvar = alloca i8 + + call void @bar(i8* %locvar) +; CHECK: bl bar + + %inc.1 = load i8* %locvar + %inc.2 = zext i8 %inc.1 to i64 + %inc.3 = add i64 %inc.2, 1 + %inc.4 = trunc i64 %inc.3 to i8 + store i8 %inc.4, i8* %locvar +; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR:#[0-9]+]]] +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1 +; CHECK: strb {{w[0-9]+}}, [sp, [[LOCADDR]]] +; CHECK: ldrb {{w[0-9]+}}, [sp, [[LOCADDR]]] + + %ret.1 = load i8* %locvar + %ret.2 = zext i8 %ret.1 to i64 + ret i64 %ret.2 +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll new file mode 100644 index 000000000000..980e2ffef901 --- /dev/null +++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s +@var = global i32 0 + +declare void @bar() + +define void @test_w29_reserved() { +; CHECK: test_w29_reserved: +; CHECK: add x29, sp, #{{[0-9]+}} + + %val1 = load volatile i32* @var + %val2 = load volatile i32* @var + %val3 = load volatile i32* @var + %val4 = load volatile i32* @var + %val5 = load volatile i32* @var + %val6 = load volatile i32* @var + %val7 = load volatile i32* @var + %val8 = load volatile i32* @var + %val9 = load volatile i32* @var + +; CHECK-NOT: ldr w29, + + ; Call to prevent fp-elim that occurs regardless in leaf functions. + call void @bar() + + store volatile i32 %val1, i32* @var + store volatile i32 %val2, i32* @var + store volatile i32 %val3, i32* @var + store volatile i32 %val4, i32* @var + store volatile i32 %val5, i32* @var + store volatile i32 %val6, i32* @var + store volatile i32 %val7, i32* @var + store volatile i32 %val8, i32* @var + store volatile i32 %val9, i32* @var + + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/regress-wzr-allocatable.ll b/test/CodeGen/AArch64/regress-wzr-allocatable.ll new file mode 100644 index 000000000000..764d2bc44f0d --- /dev/null +++ b/test/CodeGen/AArch64/regress-wzr-allocatable.ll @@ -0,0 +1,41 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 + +; When WZR wasn't marked as reserved, this function tried to allocate +; it at O0 and then generated an internal fault (mostly incidentally) +; when it discovered that it was already in use for a multiplication. + +; I'm not really convinced this is a good test since it could easily +; stop testing what it does now with no-one any the wiser. However, I +; can't think of a better way to force the allocator to use WZR +; specifically. + +define void @test() nounwind { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + br i1 undef, label %for.body, label %for.end + +for.body: ; preds = %for.cond + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond6 + +for.cond6: ; preds = %for.body9, %for.end + br i1 undef, label %for.body9, label %while.cond30 + +for.body9: ; preds = %for.cond6 + store i16 0, i16* undef, align 2 + %0 = load i32* undef, align 4 + %1 = load i32* undef, align 4 + %mul15 = mul i32 %0, %1 + %add16 = add i32 %mul15, 32768 + %div = udiv i32 %add16, 65535 + %add17 = add i32 %div, 1 + store i32 %add17, i32* undef, align 4 + br label %for.cond6 + +while.cond30: ; preds = %for.cond6 + ret void +} diff --git a/test/CodeGen/AArch64/setcc-takes-i32.ll b/test/CodeGen/AArch64/setcc-takes-i32.ll new file mode 100644 index 000000000000..d2eb77ab1b54 --- /dev/null +++ b/test/CodeGen/AArch64/setcc-takes-i32.ll @@ -0,0 +1,22 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; Most important point here is that the promotion of the i1 works +; correctly. Previously LLVM thought that i64 was the appropriate SetCC output, +; which meant it proceded in two steps and produced an i64 -> i64 any_ext which +; couldn't be selected and faulted. + +; It was expecting the smallest legal promotion of i1 to be the preferred SetCC +; type, so we'll satisfy it (this actually arguably gives better code anyway, +; with flag-manipulation operations allowed to use W-registers). + +declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) + +define i64 @test_select(i64 %lhs, i64 %rhs) { +; CHECK: test_select: + + %res = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %lhs, i64 %rhs) + %flag = extractvalue {i64, i1} %res, 1 + %retval = select i1 %flag, i64 %lhs, i64 %rhs + ret i64 %retval +; CHECK: ret +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll new file mode 100644 index 000000000000..a1ec618b03ba --- /dev/null +++ b/test/CodeGen/AArch64/sibling-call.ll @@ -0,0 +1,97 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +declare void @callee_stack0() +declare void @callee_stack8([8 x i32], i64) +declare void @callee_stack16([8 x i32], i64, i64) + +define void @caller_to0_from0() nounwind { +; CHECK: caller_to0_from0: +; CHECK-NEXT: // BB + tail call void @callee_stack0() + ret void +; CHECK-NEXT: b callee_stack0 +} + +define void @caller_to0_from8([8 x i32], i64) nounwind{ +; CHECK: caller_to0_from8: +; CHECK-NEXT: // BB + + tail call void @callee_stack0() + ret void +; CHECK-NEXT: b callee_stack0 +} + +define void @caller_to8_from0() { +; CHECK: caller_to8_from0: + +; Caller isn't going to clean up any extra stack we allocate, so it +; can't be a tail call. + tail call void @callee_stack8([8 x i32] undef, i64 42) + ret void +; CHECK: bl callee_stack8 +} + +define void @caller_to8_from8([8 x i32], i64 %a) { +; CHECK: caller_to8_from8: +; CHECK-NOT: sub sp, sp, + +; This should reuse our stack area for the 42 + tail call void @callee_stack8([8 x i32] undef, i64 42) + ret void +; CHECK: str {{x[0-9]+}}, [sp] +; CHECK-NEXT: b callee_stack8 +} + +define void @caller_to16_from8([8 x i32], i64 %a) { +; CHECK: caller_to16_from8: + +; Shouldn't be a tail call: we can't use SP+8 because our caller might +; have something there. This may sound obvious but implementation does +; some funky aligning. + tail call void @callee_stack16([8 x i32] undef, i64 undef, i64 undef) +; CHECK: bl callee_stack16 + ret void +} + +define void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { +; CHECK: caller_to8_from24: +; CHECK-NOT: sub sp, sp + +; Reuse our area, putting "42" at incoming sp + tail call void @callee_stack8([8 x i32] undef, i64 42) + ret void +; CHECK: str {{x[0-9]+}}, [sp] +; CHECK-NEXT: b callee_stack8 +} + +define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { +; CHECK: caller_to16_from16: +; CHECK-NOT: sub sp, sp, + +; Here we want to make sure that both loads happen before the stores: +; otherwise either %a or %b will be wrongly clobbered. + tail call void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) + ret void + +; CHECK: ldr x0, +; CHECK: ldr x1, +; CHECK: str x1, +; CHECK: str x0, + +; CHECK-NOT: add sp, sp, +; CHECK: b callee_stack16 +} + +@func = global void(i32)* null + +define void @indirect_tail() { +; CHECK: indirect_tail: +; CHECK-NOT: sub sp, sp + + %fptr = load void(i32)** @func + tail call void %fptr(i32 42) + ret void +; CHECK: movz w0, #42 +; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, #:lo12:func] +; CHECK: br [[FPTR]] +}
\ No newline at end of file diff --git a/test/CodeGen/AArch64/sincos-expansion.ll b/test/CodeGen/AArch64/sincos-expansion.ll new file mode 100644 index 000000000000..c7a392b78c24 --- /dev/null +++ b/test/CodeGen/AArch64/sincos-expansion.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s + +define float @test_sincos_f32(float %f) { + %sin = call float @sinf(float %f) readnone + %cos = call float @cosf(float %f) readnone +; CHECK: bl cosf +; CHECK: bl sinf + %val = fadd float %sin, %cos + ret float %val +} + +define double @test_sincos_f64(double %f) { + %sin = call double @sin(double %f) readnone + %cos = call double @cos(double %f) readnone + %val = fadd double %sin, %cos +; CHECK: bl cos +; CHECK: bl sin + ret double %val +} + +define fp128 @test_sincos_f128(fp128 %f) { + %sin = call fp128 @sinl(fp128 %f) readnone + %cos = call fp128 @cosl(fp128 %f) readnone + %val = fadd fp128 %sin, %cos +; CHECK: bl cosl +; CHECK: bl sinl + ret fp128 %val +} + +declare float @sinf(float) readonly +declare double @sin(double) readonly +declare fp128 @sinl(fp128) readonly +declare float @cosf(float) readonly +declare double @cos(double) readonly +declare fp128 @cosl(fp128) readonly
\ No newline at end of file diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll new file mode 100644 index 000000000000..f323b151ad1e --- /dev/null +++ b/test/CodeGen/AArch64/tail-call.ll @@ -0,0 +1,94 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s + +declare fastcc void @callee_stack0() +declare fastcc void @callee_stack8([8 x i32], i64) +declare fastcc void @callee_stack16([8 x i32], i64, i64) + +define fastcc void @caller_to0_from0() nounwind { +; CHECK: caller_to0_from0: +; CHECK-NEXT: // BB + tail call fastcc void @callee_stack0() + ret void +; CHECK-NEXT: b callee_stack0 +} + +define fastcc void @caller_to0_from8([8 x i32], i64) { +; CHECK: caller_to0_from8: + + tail call fastcc void @callee_stack0() + ret void +; CHECK: add sp, sp, #16 +; CHECK-NEXT: b callee_stack0 +} + +define fastcc void @caller_to8_from0() { +; CHECK: caller_to8_from0: +; CHECK: sub sp, sp, #32 + +; Key point is that the "42" should go #16 below incoming stack +; pointer (we didn't have arg space to reuse). + tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + ret void +; CHECK: str {{x[0-9]+}}, [sp, #16] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: b callee_stack8 +} + +define fastcc void @caller_to8_from8([8 x i32], i64 %a) { +; CHECK: caller_to8_from8: +; CHECK: sub sp, sp, #16 + +; Key point is that the "%a" should go where at SP on entry. + tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + ret void +; CHECK: str {{x[0-9]+}}, [sp, #16] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: b callee_stack8 +} + +define fastcc void @caller_to16_from8([8 x i32], i64 %a) { +; CHECK: caller_to16_from8: +; CHECK: sub sp, sp, #16 + +; Important point is that the call reuses the "dead" argument space +; above %a on the stack. If it tries to go below incoming-SP then the +; callee will not deallocate the space, even in fastcc. + tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2) +; CHECK: str {{x[0-9]+}}, [sp, #24] +; CHECK: str {{x[0-9]+}}, [sp, #16] +; CHECK: add sp, sp, #16 +; CHECK: b callee_stack16 + ret void +} + + +define fastcc void @caller_to8_from24([8 x i32], i64 %a, i64 %b, i64 %c) { +; CHECK: caller_to8_from24: +; CHECK: sub sp, sp, #16 + +; Key point is that the "%a" should go where at #16 above SP on entry. + tail call fastcc void @callee_stack8([8 x i32] undef, i64 42) + ret void +; CHECK: str {{x[0-9]+}}, [sp, #32] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: b callee_stack8 +} + + +define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) { +; CHECK: caller_to16_from16: +; CHECK: sub sp, sp, #16 + +; Here we want to make sure that both loads happen before the stores: +; otherwise either %a or %b will be wrongly clobbered. + tail call fastcc void @callee_stack16([8 x i32] undef, i64 %b, i64 %a) + ret void + +; CHECK: ldr x0, +; CHECK: ldr x1, +; CHECK: str x1, +; CHECK: str x0, + +; CHECK: add sp, sp, #16 +; CHECK: b callee_stack16 +} diff --git a/test/CodeGen/AArch64/tls-dynamic-together.ll b/test/CodeGen/AArch64/tls-dynamic-together.ll new file mode 100644 index 000000000000..bad2298c8a65 --- /dev/null +++ b/test/CodeGen/AArch64/tls-dynamic-together.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s + +; If the .tlsdesccall and blr parts are emitted completely separately (even with +; glue) then LLVM will separate them quite happily (with a spill at O0, hence +; the option). This is definitely wrong, so we make sure they are emitted +; together. + +@general_dynamic_var = external thread_local global i32 + +define i32 @test_generaldynamic() { +; CHECK: test_generaldynamic: + + %val = load i32* @general_dynamic_var + ret i32 %val + +; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: blr {{x[0-9]+}} +} diff --git a/test/CodeGen/AArch64/tls-dynamics.ll b/test/CodeGen/AArch64/tls-dynamics.ll new file mode 100644 index 000000000000..cdfd11783c23 --- /dev/null +++ b/test/CodeGen/AArch64/tls-dynamics.ll @@ -0,0 +1,121 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s + +@general_dynamic_var = external thread_local global i32 + +define i32 @test_generaldynamic() { +; CHECK: test_generaldynamic: + + %val = load i32* @general_dynamic_var + ret i32 %val + +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var +; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var] +; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: blr [[CALLEE]] + +; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 +; CHECK: ldr w0, [x[[TP]], x0] + +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +} + +define i32* @test_generaldynamic_addr() { +; CHECK: test_generaldynamic_addr: + + ret i32* @general_dynamic_var + +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var +; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:general_dynamic_var] +; CHECK: .tlsdesccall general_dynamic_var +; CHECK-NEXT: blr [[CALLEE]] + +; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 +; CHECK: add x0, [[TP]], x0 + +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +} + +@local_dynamic_var = external thread_local(localdynamic) global i32 + +define i32 @test_localdynamic() { +; CHECK: test_localdynamic: + + %val = load i32* @local_dynamic_var + ret i32 %val + +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: blr [[CALLEE]] + +; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var +; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var + +; CHECK: ldr w0, [x0, [[DTP_OFFSET]]] + +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +} + +define i32* @test_localdynamic_addr() { +; CHECK: test_localdynamic_addr: + + ret i32* @local_dynamic_var + +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: blr [[CALLEE]] + +; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var +; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var + +; CHECK: add x0, x0, [[DTP_OFFSET]] + +; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE +; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC +; CHECK-RELOC: R_AARCH64_TLSDESC_CALL + +} + +; The entire point of the local-dynamic access model is to have a single call to +; the expensive resolver. Make sure we achieve that goal. + +@local_dynamic_var2 = external thread_local(localdynamic) global i32 + +define i32 @test_localdynamic_deduplicate() { +; CHECK: test_localdynamic_deduplicate: + + %val = load i32* @local_dynamic_var + %val2 = load i32* @local_dynamic_var2 + + %sum = add i32 %val, %val2 + ret i32 %sum + +; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_ +; CHECK: add x0, x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_ +; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], #:tlsdesc_lo12:_TLS_MODULE_BASE_] +; CHECK: .tlsdesccall _TLS_MODULE_BASE_ +; CHECK-NEXT: blr [[CALLEE]] + +; CHECK-NOT: _TLS_MODULE_BASE_ + +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/tls-execs.ll b/test/CodeGen/AArch64/tls-execs.ll new file mode 100644 index 000000000000..a66588422793 --- /dev/null +++ b/test/CodeGen/AArch64/tls-execs.ll @@ -0,0 +1,63 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s + +@initial_exec_var = external thread_local(initialexec) global i32 + +define i32 @test_initial_exec() { +; CHECK: test_initial_exec: + %val = load i32* @initial_exec_var + +; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var +; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var] +; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 +; CHECK: ldr w0, [x[[TP]], x[[TP_OFFSET]]] + +; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 +; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC + + ret i32 %val +} + +define i32* @test_initial_exec_addr() { +; CHECK: test_initial_exec_addr: + ret i32* @initial_exec_var + +; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var +; CHECK: ldr [[TP_OFFSET:x[0-9]+]], [x[[GOTADDR]], #:gottprel_lo12:initial_exec_var] +; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 +; CHECK: add x0, [[TP]], [[TP_OFFSET]] + +; CHECK-RELOC: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 +; CHECK-RELOC: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC + +} + +@local_exec_var = thread_local(initialexec) global i32 0 + +define i32 @test_local_exec() { +; CHECK: test_local_exec: + %val = load i32* @local_exec_var + +; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var +; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var +; CHECK: mrs x[[TP:[0-9]+]], tpidr_el0 +; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]] + +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC + + ret i32 %val +} + +define i32* @test_local_exec_addr() { +; CHECK: test_local_exec_addr: + ret i32* @local_exec_var + +; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var +; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var +; CHECK: mrs [[TP:x[0-9]+]], tpidr_el0 +; CHECK: add x0, [[TP]], [[TP_OFFSET]] + +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1 +; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC +} diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll new file mode 100644 index 000000000000..65c1fda49e2d --- /dev/null +++ b/test/CodeGen/AArch64/tst-br.ll @@ -0,0 +1,48 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; We've got the usual issues with LLVM reordering blocks here. The +; tests are correct for the current order, but who knows when that +; will change. Beware! +@var32 = global i32 0 +@var64 = global i64 0 + +define i32 @test_tbz() { +; CHECK: test_tbz: + + %val = load i32* @var32 + %val64 = load i64* @var64 + + %tbit0 = and i32 %val, 32768 + %tst0 = icmp ne i32 %tbit0, 0 + br i1 %tst0, label %test1, label %end1 +; CHECK: tbz {{w[0-9]+}}, #15, [[LBL_end1:.LBB0_[0-9]+]] + +test1: + %tbit1 = and i32 %val, 4096 + %tst1 = icmp ne i32 %tbit1, 0 + br i1 %tst1, label %test2, label %end1 +; CHECK: tbz {{w[0-9]+}}, #12, [[LBL_end1]] + +test2: + %tbit2 = and i64 %val64, 32768 + %tst2 = icmp ne i64 %tbit2, 0 + br i1 %tst2, label %test3, label %end1 +; CHECK: tbz {{x[0-9]+}}, #15, [[LBL_end1]] + +test3: + %tbit3 = and i64 %val64, 4096 + %tst3 = icmp ne i64 %tbit3, 0 + br i1 %tst3, label %end2, label %end1 +; CHECK: tbz {{x[0-9]+}}, #12, [[LBL_end1]] + +end2: +; CHECK: movz x0, #1 +; CHECK-NEXT: ret + ret i32 1 + +end1: +; CHECK: [[LBL_end1]]: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ret + ret i32 0 +} diff --git a/test/CodeGen/AArch64/variadic.ll b/test/CodeGen/AArch64/variadic.ll new file mode 100644 index 000000000000..c5d319eb112b --- /dev/null +++ b/test/CodeGen/AArch64/variadic.ll @@ -0,0 +1,144 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +%va_list = type {i8*, i8*, i8*, i32, i32} + +@var = global %va_list zeroinitializer + +declare void @llvm.va_start(i8*) + +define void @test_simple(i32 %n, ...) { +; CHECK: test_simple: +; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] +; CHECK: mov x[[FPRBASE:[0-9]+]], sp +; CHECK: str q7, [x[[FPRBASE]], #112] +; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] +; CHECK: str x7, [x[[GPRBASE]], #48] + +; Omit the middle ones + +; CHECK: str q0, [sp] +; CHECK: str x1, [sp, #[[GPRFROMSP]]] + + %addr = bitcast %va_list* @var to i8* + call void @llvm.va_start(i8* %addr) +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var +; CHECK: movn [[VR_OFFS:w[0-9]+]], #127 +; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] +; CHECK: movn [[GR_OFFS:w[0-9]+]], #55 +; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] +; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #128 +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #56 +; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] +; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] +; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] + + ret void +} + +define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) { +; CHECK: test_fewargs: +; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]] +; CHECK: mov x[[FPRBASE:[0-9]+]], sp +; CHECK: str q7, [x[[FPRBASE]], #96] +; CHECK: add x[[GPRBASE:[0-9]+]], sp, #[[GPRFROMSP:[0-9]+]] +; CHECK: str x7, [x[[GPRBASE]], #32] + +; Omit the middle ones + +; CHECK: str q1, [sp] +; CHECK: str x3, [sp, #[[GPRFROMSP]]] + + %addr = bitcast %va_list* @var to i8* + call void @llvm.va_start(i8* %addr) +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var +; CHECK: movn [[VR_OFFS:w[0-9]+]], #111 +; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] +; CHECK: movn [[GR_OFFS:w[0-9]+]], #39 +; CHECK: str [[GR_OFFS]], [x[[VA_LIST]], #24] +; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #112 +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: add [[GR_TOP:x[0-9]+]], x[[GPRBASE]], #40 +; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8] +; CHECK: add [[STACK:x[0-9]+]], sp, #[[STACKSIZE]] +; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] + + ret void +} + +define void @test_nospare([8 x i64], [8 x float], ...) { +; CHECK: test_nospare: + + %addr = bitcast %va_list* @var to i8* + call void @llvm.va_start(i8* %addr) +; CHECK-NOT: sub sp, sp +; CHECK: mov [[STACK:x[0-9]+]], sp +; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] + + ret void +} + +; If there are non-variadic arguments on the stack (here two i64s) then the +; __stack field should point just past them. +define void @test_offsetstack([10 x i64], [3 x float], ...) { +; CHECK: test_offsetstack: +; CHECK: sub sp, sp, #80 +; CHECK: mov x[[FPRBASE:[0-9]+]], sp +; CHECK: str q7, [x[[FPRBASE]], #64] + +; CHECK-NOT: str x{{[0-9]+}}, +; Omit the middle ones + +; CHECK: str q3, [sp] + + %addr = bitcast %va_list* @var to i8* + call void @llvm.va_start(i8* %addr) +; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var +; CHECK: movn [[VR_OFFS:w[0-9]+]], #79 +; CHECK: str [[VR_OFFS]], [x[[VA_LIST]], #28] +; CHECK: str wzr, [x[[VA_LIST]], #24] +; CHECK: add [[VR_TOP:x[0-9]+]], x[[FPRBASE]], #80 +; CHECK: str [[VR_TOP]], [x[[VA_LIST]], #16] +; CHECK: add [[STACK:x[0-9]+]], sp, #96 +; CHECK: str [[STACK]], [{{x[0-9]+}}, #:lo12:var] + + ret void +} + +declare void @llvm.va_end(i8*) + +define void @test_va_end() nounwind { +; CHECK: test_va_end: +; CHECK-NEXT: BB#0 + + %addr = bitcast %va_list* @var to i8* + call void @llvm.va_end(i8* %addr) + + ret void +; CHECK-NEXT: ret +} + +declare void @llvm.va_copy(i8* %dest, i8* %src) + +@second_list = global %va_list zeroinitializer + +define void @test_va_copy() { +; CHECK: test_va_copy: + %srcaddr = bitcast %va_list* @var to i8* + %dstaddr = bitcast %va_list* @second_list to i8* + call void @llvm.va_copy(i8* %dstaddr, i8* %srcaddr) + +; Check beginning and end again: + +; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var] +; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list] + +; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list +; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var + +; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24] +; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24] + + ret void +; CHECK: ret +} diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll new file mode 100644 index 000000000000..fef0437ae7f3 --- /dev/null +++ b/test/CodeGen/AArch64/zero-reg.ll @@ -0,0 +1,31 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +@var32 = global i32 0 +@var64 = global i64 0 + +define void @test_zr() { +; CHECK: test_zr: + + store i32 0, i32* @var32 +; CHECK: str wzr, [{{x[0-9]+}}, #:lo12:var32] + store i64 0, i64* @var64 +; CHECK: str xzr, [{{x[0-9]+}}, #:lo12:var64] + + ret void +; CHECK: ret +} + +define void @test_sp(i32 %val) { +; CHECK: test_sp: + +; Important correctness point here is that LLVM doesn't try to use xzr +; as an addressing register: "str w0, [xzr]" is not a valid A64 +; instruction (0b11111 in the Rn field would mean "sp"). + %addr = getelementptr i32* null, i64 0 + store i32 %val, i32* %addr +; CHECK: mov x[[NULL:[0-9]+]], xzr +; CHECK: str {{w[0-9]+}}, [x[[NULL]]] + + ret void +; CHECK: ret +}
\ No newline at end of file |