diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2013-06-10 20:36:52 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2013-06-10 20:36:52 +0000 |
| commit | 59d6cff90eecf31cb3dd860c4e786674cfdd42eb (patch) | |
| tree | 909310b2e05119d1d6efda049977042abbb58bb1 /test/CodeGen/ARM | |
| parent | 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 (diff) | |
Notes
Diffstat (limited to 'test/CodeGen/ARM')
56 files changed, 1476 insertions, 451 deletions
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll index 91a9903f3852..112512ff59a5 100644 --- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -79,7 +79,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] !2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll index 36d15757c314..b253fefe87c4 100644 --- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll +++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll @@ -1,36 +1,47 @@ ; RUN: llc %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=BASIC %s +; RUN: llvm-readobj -s -sd | FileCheck -check-prefix=BASIC %s ; RUN: llc %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \ ; RUN: -mattr=-neon,-vfp3,+vfp2 \ ; RUN: -arm-reserve-r9 -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=CORTEXA8 %s +; RUN: llvm-readobj -s -sd | FileCheck -check-prefix=CORTEXA8 %s ; This tests that the extpected ARM attributes are emitted. ; -; BASIC: .ARM.attributes -; BASIC-NEXT: 0x70000003 -; BASIC-NEXT: 0x00000000 -; BASIC-NEXT: 0x00000000 -; BASIC-NEXT: 0x0000003c -; BASIC-NEXT: 0x00000022 -; BASIC-NEXT: 0x00000000 -; BASIC-NEXT: 0x00000000 -; BASIC-NEXT: 0x00000001 -; BASIC-NEXT: 0x00000000 -; BASIC-NEXT: '41210000 00616561 62690001 17000000 060a0741 08010902 14011501 17031801 1901' +; BASIC: Section { +; BASIC: Name: .ARM.attributes +; BASIC-NEXT: Type: SHT_ARM_ATTRIBUTES +; BASIC-NEXT: Flags [ (0x0) +; BASIC-NEXT: ] +; BASIC-NEXT: Address: 0x0 +; BASIC-NEXT: Offset: 0x3C +; BASIC-NEXT: Size: 34 +; BASIC-NEXT: Link: 0 +; BASIC-NEXT: Info: 0 +; BASIC-NEXT: AddressAlignment: 1 +; BASIC-NEXT: EntrySize: 0 +; BASIC-NEXT: SectionData ( +; BASIC-NEXT: 0000: 41210000 00616561 62690001 17000000 +; BASIC-NEXT: 0010: 060A0741 08010902 14011501 17031801 +; BASIC-NEXT: 0020: 1901 +; BASIC-NEXT: ) -; CORTEXA8: .ARM.attributes -; CORTEXA8-NEXT: 0x70000003 -; CORTEXA8-NEXT: 0x00000000 -; CORTEXA8-NEXT: 0x00000000 -; CORTEXA8-NEXT: 0x0000003c -; CORTEXA8-NEXT: 0x0000002f -; CORTEXA8-NEXT: 0x00000000 -; CORTEXA8-NEXT: 0x00000000 -; CORTEXA8-NEXT: 0x00000001 -; CORTEXA8-NEXT: 0x00000000 -; CORTEXA8-NEXT: '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901' +; CORTEXA8: Name: .ARM.attributes +; CORTEXA8-NEXT: Type: SHT_ARM_ATTRIBUTES +; CORTEXA8-NEXT: Flags [ (0x0) +; CORTEXA8-NEXT: ] +; CORTEXA8-NEXT: Address: 0x0 +; CORTEXA8-NEXT: Offset: 0x3C +; CORTEXA8-NEXT: Size: 47 +; CORTEXA8-NEXT: Link: 0 +; CORTEXA8-NEXT: Info: 0 +; CORTEXA8-NEXT: AddressAlignment: 1 +; CORTEXA8-NEXT: EntrySize: 0 +; CORTEXA8-NEXT: SectionData ( +; CORTEXA8-NEXT: 0000: 412E0000 00616561 62690001 24000000 +; CORTEXA8-NEXT: 0010: 05434F52 5445582D 41380006 0A074108 +; CORTEXA8-NEXT: 0020: 0109020A 02140115 01170318 011901 +; CORTEXA8-NEXT: ) define i32 @f(i64 %z) { ret i32 0 diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll index 94a05412f5d4..9eecd045bfa0 100644 --- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll +++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll @@ -1,5 +1,5 @@ ; RUN: llc %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s +; RUN: llvm-readobj -s -sr -sd | FileCheck -check-prefix=OBJ %s target triple = "armv7-none-linux-gnueabi" @@ -9,32 +9,17 @@ define arm_aapcs_vfpcc i32 @barf() nounwind { entry: %0 = tail call arm_aapcs_vfpcc i32 @foo(i8* @a) nounwind ret i32 %0 -; OBJ: '.text' -; OBJ-NEXT: 'sh_type' -; OBJ-NEXT: 'sh_flags' -; OBJ-NEXT: 'sh_addr' -; OBJ-NEXT: 'sh_offset' -; OBJ-NEXT: 'sh_size' -; OBJ-NEXT: 'sh_link' -; OBJ-NEXT: 'sh_info' -; OBJ-NEXT: 'sh_addralign' -; OBJ-NEXT: 'sh_entsize' -; OBJ-NEXT: '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8' - -; OBJ: Relocation 0 -; OBJ-NEXT: 'r_offset', 0x00000004 -; OBJ-NEXT: 'r_sym', 0x000009 -; OBJ-NEXT: 'r_type', 0x2b - -; OBJ: Relocation 1 -; OBJ-NEXT: 'r_offset', 0x00000008 -; OBJ-NEXT: 'r_sym' -; OBJ-NEXT: 'r_type', 0x2c - -; OBJ: # Relocation 2 -; OBJ-NEXT: 'r_offset', 0x0000000c -; OBJ-NEXT: 'r_sym', 0x00000a -; OBJ-NEXT: 'r_type', 0x1c +; OBJ: Section { +; OBJ: Name: .text +; OBJ: Relocations [ +; OBJ-NEXT: 0x4 R_ARM_MOVW_ABS_NC a +; OBJ-NEXT: 0x8 R_ARM_MOVT_ABS +; OBJ-NEXT: 0xC R_ARM_CALL foo +; OBJ-NEXT: ] +; OBJ-NEXT: SectionData ( +; OBJ-NEXT: 0000: 00482DE9 000000E3 000040E3 FEFFFFEB +; OBJ-NEXT: 0010: 0088BDE8 +; OBJ-NEXT: ) } diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll index b8ed8199d398..1351a26756ef 100644 --- a/test/CodeGen/ARM/2010-12-08-tpsoft.ll +++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll @@ -1,9 +1,9 @@ ; RUN: llc %s -mtriple=armv7-linux-gnueabi -o - | \ ; RUN: FileCheck -check-prefix=ELFASM %s ; RUN: llc %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=ELFOBJ %s +; RUN: llvm-readobj -s -sd | FileCheck -check-prefix=ELFOBJ %s -;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly +;; Make sure that bl __aeabi_read_tp is materialized and fixed up correctly ;; in the obj case. @i = external thread_local global i32 @@ -24,19 +24,13 @@ bb: ; preds = %entry ; ELFASM: bl __aeabi_read_tp -; ELFOBJ: '.text' -; ELFOBJ-NEXT: 'sh_type' -; ELFOBJ-NEXT: 'sh_flags' -; ELFOBJ-NEXT: 'sh_addr' -; ELFOBJ-NEXT: 'sh_offset' -; ELFOBJ-NEXT: 'sh_size' -; ELFOBJ-NEXT: 'sh_link' -; ELFOBJ-NEXT: 'sh_info' -; ELFOBJ-NEXT: 'sh_addralign' -; ELFOBJ-NEXT: 'sh_entsize' -;;; BL __aeabi_read_tp is ---+ -;;; V -; ELFOBJ-NEXT: 00482de9 3c009fe5 00109fe7 feffffeb +; ELFOBJ: Sections [ +; ELFOBJ: Section { +; ELFOBJ: Name: .text +; ELFOBJ: SectionData ( +;;; BL __aeabi_read_tp is ---------+ +;;; V +; ELFOBJ-NEXT: 0000: 00482DE9 3C009FE5 00109FE7 FEFFFFEB bb1: ; preds = %entry diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll index 1272a257931d..f13bc1214a5a 100644 --- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll +++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll @@ -1,5 +1,5 @@ ; RUN: llc %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s +; RUN: llvm-readobj -s -t | FileCheck -check-prefix=OBJ %s ; RUN: llc %s -mtriple=armv7-linux-gnueabi -o - | \ ; RUN: FileCheck -check-prefix=ASM %s @@ -15,17 +15,20 @@ ; ASM-NEXT: .type _MergedGlobals,%object @ @_MergedGlobals +; OBJ: Sections [ +; OBJ: Section { +; OBJ: Index: 4 +; OBJ-NEXT: Name: .bss -; OBJ: Section 4 -; OBJ-NEXT: '.bss' - -; OBJ: 'array00' -; OBJ-NEXT: 'st_value', 0x00000000 -; OBJ-NEXT: 'st_size', 0x00000050 -; OBJ-NEXT: 'st_bind', 0x0 -; OBJ-NEXT: 'st_type', 0x1 -; OBJ-NEXT: 'st_other', 0x00 -; OBJ-NEXT: 'st_shndx', 0x0004 +; OBJ: Symbols [ +; OBJ: Symbol { +; OBJ: Name: array00 +; OBJ-NEXT: Value: 0x0 +; OBJ-NEXT: Size: 80 +; OBJ-NEXT: Binding: Local +; OBJ-NEXT: Type: Object +; OBJ-NEXT: Other: 0 +; OBJ-NEXT: Section: .bss define i32 @main(i32 %argc) nounwind { %1 = load i32* @sum, align 4 diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index 1d1b89a34f9a..98c0af35ef9a 100644 --- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -79,7 +79,7 @@ entry: !0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5, metadata !5} !5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index 266609b8ce69..7a7ca8e0d8d9 100644 --- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -74,7 +74,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !41, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll index 1b21f7571d8e..9334bf36d805 100644 --- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll +++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -15,13 +15,13 @@ for.cond: ; preds = %for.body, %entry for.body: ; preds = %for.cond %v.5 = select i1 undef, i32 undef, i32 0 - %0 = load i8* undef, align 1, !tbaa !0 + %0 = load i8* undef, align 1 %conv88 = zext i8 %0 to i32 %sub89 = sub nsw i32 0, %conv88 %v.8 = select i1 undef, i32 undef, i32 %sub89 - %1 = load i8* null, align 1, !tbaa !0 + %1 = load i8* null, align 1 %conv108 = zext i8 %1 to i32 - %2 = load i8* undef, align 1, !tbaa !0 + %2 = load i8* undef, align 1 %conv110 = zext i8 %2 to i32 %sub111 = sub nsw i32 %conv108, %conv110 %cmp112 = icmp slt i32 %sub111, 0 @@ -44,6 +44,3 @@ if.end299: ; preds = %for.body, %for.cond %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ] ret i32 %s.10 } - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll index 926daafbb7f1..0f1c452b8678 100644 --- a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll +++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll @@ -18,7 +18,7 @@ bb3: ; preds = %bb4, %bb2 br i1 %tmp, label %bb4, label %bb67 bb4: ; preds = %bb3 - %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp5 = load <4 x i32>* undef, align 16 %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607> %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216> %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float> @@ -41,9 +41,9 @@ bb4: ; preds = %bb3 %tmp24 = trunc i128 %tmp23 to i64 %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0 %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1 - %tmp27 = load float* undef, align 4, !tbaa !2 + %tmp27 = load float* undef, align 4 %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3 - %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0 + %tmp29 = load <4 x i32>* undef, align 16 %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607> %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216> %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float> @@ -52,10 +52,10 @@ bb4: ; preds = %bb3 %tmp35 = fmul <4 x float> %tmp34, undef %tmp36 = fmul <4 x float> %tmp35, undef %tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind - %tmp38 = load float* undef, align 4, !tbaa !2 + %tmp38 = load float* undef, align 4 %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0 %tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind - %tmp41 = load float* undef, align 4, !tbaa !2 + %tmp41 = load float* undef, align 4 %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3 %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer %tmp44 = fmul <4 x float> %tmp33, %tmp43 @@ -64,10 +64,10 @@ bb4: ; preds = %bb3 %tmp47 = fmul <4 x float> %tmp46, %tmp36 %tmp48 = fadd <4 x float> undef, %tmp47 %tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind - %tmp50 = load float* undef, align 4, !tbaa !2 + %tmp50 = load float* undef, align 4 %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3 %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind - %tmp54 = load float* %tmp52, align 4, !tbaa !2 + %tmp54 = load float* %tmp52, align 4 %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3 %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22 %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind @@ -99,7 +99,3 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA", null} -!2 = metadata !{metadata !"float", metadata !0} diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll index f1c85f1b41f5..61623ec1b6a4 100644 --- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll +++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll @@ -7,7 +7,7 @@ target triple = "armv7-none-linux-eabi" ; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE. define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 { bb: - %tmp = load <2 x float>* undef, align 8, !tbaa !0 + %tmp = load <2 x float>* undef, align 8 %tmp2 = extractelement <2 x float> %tmp, i32 0 %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0 %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1 @@ -70,6 +70,3 @@ entry: declare arm_aapcs_vfpcc void @bar(i8*, float, float, float) declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll index 5f24e427c229..a9e2ebb7fe12 100644 --- a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll +++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll @@ -56,9 +56,9 @@ bb3: ; preds = %bb2 %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float> %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> - %tmp42 = load <4 x float>* null, align 16, !tbaa !0 + %tmp42 = load <4 x float>* null, align 16 %tmp43 = fmul <4 x float> %tmp42, %tmp41 - %tmp44 = load <4 x float>* undef, align 16, !tbaa !0 + %tmp44 = load <4 x float>* undef, align 16 %tmp45 = fadd <4 x float> undef, %tmp43 %tmp46 = fadd <4 x float> undef, %tmp45 %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64> @@ -108,7 +108,7 @@ bb3: ; preds = %bb2 %tmp89 = fmul <4 x float> undef, %tmp88 %tmp90 = fadd <4 x float> %tmp89, undef %tmp91 = fadd <4 x float> undef, %tmp90 - store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0 + store <4 x float> %tmp91, <4 x float>* undef, align 16 unreachable bb92: ; preds = %bb2 @@ -116,6 +116,3 @@ bb92: ; preds = %bb2 } declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll index 33ad187926bf..0843fdc4e75e 100644 --- a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll +++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll @@ -9,16 +9,13 @@ define arm_aapcs_vfpcc void @foo() nounwind align 2 { ; <label>:1 ; preds = %0 %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1> %3 = bitcast <2 x i64> %2 to <4 x float> - store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0 - store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0 - store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0 + store <4 x float> zeroinitializer, <4 x float>* undef, align 16 + store <4 x float> zeroinitializer, <4 x float>* undef, align 16 + store <4 x float> %3, <4 x float>* undef, align 16 %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2 - store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0 + store <4 x float> %4, <4 x float>* undef, align 16 unreachable ; <label>:5 ; preds = %0 ret void } - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll index 6f50f279b5de..089dc9153afa 100644 --- a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll +++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll @@ -20,12 +20,9 @@ bb5: ; preds = %bb4 %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer %tmp16 = fmul <4 x float> zeroinitializer, %tmp15 %tmp17 = fadd <4 x float> %tmp16, %arg - store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0 + store <4 x float> %tmp17, <4 x float>* undef, align 8 br label %bb18 bb18: ; preds = %bb5, %bb4 ret void } - -!0 = metadata !{metadata !"omnipotent char", metadata !1} -!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll index ca0964a05933..a288015d6016 100644 --- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll +++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll @@ -26,18 +26,14 @@ ; CHECK: Successors: define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind { entry: - store volatile i32 65540, i32* %p1, align 4, !tbaa !0 - %0 = load volatile i32* %p2, align 4, !tbaa !0 + store volatile i32 65540, i32* %p1, align 4 + %0 = load volatile i32* %p2, align 4 ret i32 %0 } define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind { entry: - store i32 65540, i32* %p1, align 4, !tbaa !0 - %0 = load i32* %p2, align 4, !tbaa !0 + store i32 65540, i32* %p1, align 4 + %0 = load i32* %p2, align 4 ret i32 %0 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll index e4ad45bf526e..adb5c7e4b259 100644 --- a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll +++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll @@ -129,7 +129,7 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable %45 = fmul <4 x float> undef, undef %46 = fmul <4 x float> %45, %43 %47 = fmul <4 x float> undef, %44 - %48 = load <4 x float>* undef, align 8, !tbaa !1 + %48 = load <4 x float>* undef, align 8 %49 = bitcast <4 x float> %48 to <2 x i64> %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1> %51 = bitcast <1 x i64> %50 to <2 x float> @@ -145,10 +145,10 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable %61 = fmul <4 x float> %59, %60 %62 = fmul <4 x float> %61, <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01> %63 = fadd <4 x float> %47, %62 - store <4 x float> %46, <4 x float>* undef, align 8, !tbaa !1 + store <4 x float> %46, <4 x float>* undef, align 8 call arm_aapcs_vfpcc void @bar(%0* undef, float 0.000000e+00) nounwind call arm_aapcs_vfpcc void @bar(%0* undef, float 0.000000e+00) nounwind - store <4 x float> %63, <4 x float>* undef, align 8, !tbaa !1 + store <4 x float> %63, <4 x float>* undef, align 8 unreachable ; <label>:64 ; preds = %41, %40 @@ -170,5 +170,3 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable declare arm_aapcs_vfpcc void @bar(%0*, float) !0 = metadata !{metadata !"branch_weights", i32 64, i32 4} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll index 38b9e0e8f086..05abdeda0f19 100644 --- a/test/CodeGen/ARM/2013-01-21-PR14992.ll +++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll @@ -6,11 +6,11 @@ ;CHECK: foo: define i32 @foo(i32* %a) nounwind optsize { entry: - %0 = load i32* %a, align 4, !tbaa !0 + %0 = load i32* %a, align 4 %arrayidx1 = getelementptr inbounds i32* %a, i32 1 - %1 = load i32* %arrayidx1, align 4, !tbaa !0 + %1 = load i32* %arrayidx1, align 4 %arrayidx2 = getelementptr inbounds i32* %a, i32 2 - %2 = load i32* %arrayidx2, align 4, !tbaa !0 + %2 = load i32* %arrayidx2, align 4 %add.ptr = getelementptr inbounds i32* %a, i32 3 ;Make sure we do not have a duplicated register in the front of the reg list ;EXPECTED: ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}}, @@ -22,7 +22,3 @@ entry: } declare void @bar(i32*) optsize - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll new file mode 100644 index 000000000000..4a5ca9db0e50 --- /dev/null +++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll @@ -0,0 +1,73 @@ +;PR15293: ARM codegen ice - expected larger existing stack allocation +;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s + +;CHECK: foo: +;CHECK: sub sp, sp, #8 +;CHECK: push {r11, lr} +;CHECK: str r0, [sp, #12] +;CHECK: add r0, sp, #12 +;CHECK: bl fooUseParam +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #8 +;CHECK: mov pc, lr + +;CHECK: foo2: +;CHECK: sub sp, sp, #16 +;CHECK: push {r11, lr} +;CHECK: str r0, [sp, #12] +;CHECK: add r0, sp, #12 +;CHECK: str r2, [sp, #16] +;CHECK: bl fooUseParam +;CHECK: add r0, sp, #16 +;CHECK: bl fooUseParam +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #16 +;CHECK: mov pc, lr + +;CHECK: doFoo: +;CHECK: push {r11, lr} +;CHECK: ldr r0, +;CHECK: ldr r0, [r0] +;CHECK: bl foo +;CHECK: pop {r11, lr} +;CHECK: mov pc, lr + + +;CHECK: doFoo2: +;CHECK: push {r11, lr} +;CHECK: ldr r0, +;CHECK: mov r1, #0 +;CHECK: ldr r0, [r0] +;CHECK: mov r2, r0 +;CHECK: bl foo2 +;CHECK: pop {r11, lr} +;CHECK: mov pc, lr + + +%artz = type { i32 } +@static_val = constant %artz { i32 777 } + +declare void @fooUseParam(%artz* ) + +define void @foo(%artz* byval %s) { + call void @fooUseParam(%artz* %s) + ret void +} + +define void @foo2(%artz* byval %s, i32 %p, %artz* byval %s2) { + call void @fooUseParam(%artz* %s) + call void @fooUseParam(%artz* %s2) + ret void +} + + +define void @doFoo() { + call void @foo(%artz* byval @static_val) + ret void +} + +define void @doFoo2() { + call void @foo2(%artz* byval @static_val, i32 0, %artz* byval @static_val) + ret void +} + diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll new file mode 100644 index 000000000000..38d515f9227f --- /dev/null +++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll @@ -0,0 +1,95 @@ +;Check 5.5 Parameter Passing --> Stage C --> C.4 statement, when NSAA is not +;equal to SP. +; +; Our purpose: make NSAA != SP, and only after start to use GPRs. +; +;Co-Processor register candidates may be either in VFP or in stack, so after +;all VFP are allocated, stack is used. We can use stack without GPR allocation +;in that case, passing 9 f64 params, for example. +;First eight params goes to d0-d7, ninth one goes to the stack. +;Now, as 10th parameter, we pass i32, and it must go to R0. +; +;5.5 Parameter Passing, Stage C: +; +;C.2.cp If the argument is a CPRC then any co-processor registers in that class +;that are unallocated are marked as unavailable. The NSAA is adjusted upwards +;until it is correctly aligned for the argument and the argument is copied to +;the memory at the adjusted NSAA. The NSAA is further incremented by the size +;of the argument. The argument has now been allocated. +;... +;C.4 If the size in words of the argument is not more than r4 minus NCRN, the +;argument is copied into core registers, starting at the NCRN. The NCRN is +;incremented by the number of registers used. Successive registers hold the +;parts of the argument they would hold if its value were loaded into those +;registers from memory using an LDM instruction. The argument has now been +;allocated. +; +;What is actually checked here: +;Here we check that i32 param goes to r0. +; +;Current test-case was produced with command: +;arm-linux-gnueabihf-clang -mcpu=cortex-a9 params-to-GPR.c -S -O1 -emit-llvm +; +;// params-to-GRP.c: +; +;void fooUseI32(unsigned); +; +;void foo(long double p0, +; long double p1, +; long double p2, +; long double p3, +; long double p4, +; long double p5, +; long double p6, +; long double p7, +; long double p8, +; unsigned p9) { +; fooUseI32(p9); +;} +; +;void doFoo() { +; foo( 1,2,3,4,5,6,7,8,9, 43 ); +;} + +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s +; +;CHECK: foo: +;CHECK-NOT: mov r0 +;CHECK-NOT: ldr r0 +;CHECK: bl fooUseI32 +;CHECK: doFoo: +;CHECK: movs r0, #43 +;CHECK: bl foo + +define void @foo(double %p0, ; --> D0 + double %p1, ; --> D1 + double %p2, ; --> D2 + double %p3, ; --> D3 + double %p4, ; --> D4 + double %p5, ; --> D5 + double %p6, ; --> D6 + double %p7, ; --> D7 + double %p8, ; --> Stack + i32 %p9) #0 { ; --> R0, not Stack+8 +entry: + tail call void @fooUseI32(i32 %p9) + ret void +} + +declare void @fooUseI32(i32) + +define void @doFoo() { +entry: + tail call void @foo(double 23.0, ; --> D0 + double 23.1, ; --> D1 + double 23.2, ; --> D2 + double 23.3, ; --> D3 + double 23.4, ; --> D4 + double 23.5, ; --> D5 + double 23.6, ; --> D6 + double 23.7, ; --> D7 + double 23.8, ; --> Stack + i32 43) ; --> R0, not Stack+8 + ret void +} + diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll new file mode 100644 index 000000000000..446403d79cac --- /dev/null +++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll @@ -0,0 +1,61 @@ +;Check 5.5 Parameter Passing --> Stage C --> C.5 statement, when NSAA is not +;equal to SP. +; +; Our purpose: make NSAA != SP, and only after start to use GPRs, then pass +; byval parameter and check that it goes to stack only. +; +;Co-Processor register candidates may be either in VFP or in stack, so after +;all VFP are allocated, stack is used. We can use stack without GPR allocation +;in that case, passing 9 f64 params, for example. +;First eight params goes to d0-d7, ninth one goes to the stack. +;Now, as 10th parameter, we pass i32, and it must go to R0. +; +;For more information, +;please, read 5.5 Parameter Passing, Stage C, stages C.2.cp, C.4 and C.5 +; +; +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s + +%struct_t = type { i32, i32, i32, i32 } +@static_val = constant %struct_t { i32 777, i32 888, i32 999, i32 1000 } +declare void @fooUseStruct(%struct_t*) + +define void @foo2(double %p0, ; --> D0 + double %p1, ; --> D1 + double %p2, ; --> D2 + double %p3, ; --> D3 + double %p4, ; --> D4 + double %p5, ; --> D5 + double %p6, ; --> D6 + double %p7, ; --> D7 + double %p8, ; --> Stack + i32 %p9, ; --> R0 + %struct_t* byval %p10) ; --> Stack+8 +{ +entry: +;CHECK: push.w {r11, lr} +;CHECK-NOT: stm +;CHECK: add r0, sp, #16 +;CHECK: bl fooUseStruct + call void @fooUseStruct(%struct_t* %p10) + + ret void +} + +define void @doFoo2() { +entry: +;CHECK-NOT: ldm + tail call void @foo2(double 23.0, ; --> D0 + double 23.1, ; --> D1 + double 23.2, ; --> D2 + double 23.3, ; --> D3 + double 23.4, ; --> D4 + double 23.5, ; --> D5 + double 23.6, ; --> D6 + double 23.7, ; --> D7 + double 23.8, ; --> Stack + i32 43, ; --> R0, not Stack+8 + %struct_t* byval @static_val) ; --> Stack+8, not R1 + ret void +} + diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll index 2561686c1f83..459992818749 100644 --- a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll +++ b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll @@ -1,18 +1,17 @@ ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s -; The test is presented by Jiangning Liu. -;CHECK-NOT: vldmia +; PR14824. The test is presented by Jiangning Liu. If the ld/st optimization algorithm is changed, this test case may fail. +; Also if the machine code for ld/st optimizor is changed, this test case may fail. If so, remove this test. define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind { +; CHECK: sample_test +; CHECK-NOT: vldmia +; CHECK: add entry: + +; Load %source %s0 = load <8 x i64> * %source, align 64 - %s1 = load <8 x i64> * %secondSource, align 64 - %s2 = bitcast <8 x i64> %s0 to i512 - %data.i.i.48.extract.shift = lshr i512 %s2, 384 - %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64 %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6 %s120 = load <8 x i64> * %arrayidx64, align 64 - %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6 - %s121 = load <8 x i64> * %arrayidx67, align 64 %s122 = bitcast <8 x i64> %s120 to i512 %data.i.i677.48.extract.shift = lshr i512 %s122, 384 %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64 @@ -32,6 +31,11 @@ entry: %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5 %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6 %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7 + +; Load %secondSource + %s1 = load <8 x i64> * %secondSource, align 64 + %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6 + %s121 = load <8 x i64> * %arrayidx67, align 64 %s131 = bitcast <8 x i64> %s121 to i512 %data.i1.i676.48.extract.shift = lshr i512 %s131, 384 %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64 @@ -51,34 +55,16 @@ entry: %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5 %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6 %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7 + +; Operations about %Source and %secondSource %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef> %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef> %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef> %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6 store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64 - %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7 - %s140 = load <8 x i64> * %arrayidx75, align 64 %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7 %s141 = load <8 x i64> * %arrayidx78, align 64 - %s142 = bitcast <8 x i64> %s140 to i512 - %data.i.i650.32.extract.shift = lshr i512 %s142, 256 - %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64 - %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0 - %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1 - %data.i.i650.16.extract.shift = lshr i512 %s142, 128 - %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64 - %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2 - %data.i.i650.8.extract.shift = lshr i512 %s142, 64 - %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64 - %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3 - %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4 - %data.i.i650.48.extract.shift = lshr i512 %s142, 384 - %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64 - %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5 - %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6 - %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64 - %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7 %s151 = bitcast <8 x i64> %s141 to i512 %data.i1.i649.32.extract.shift = lshr i512 %s151, 256 %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64 @@ -90,21 +76,7 @@ entry: %data.i1.i649.8.extract.shift = lshr i512 %s151, 64 %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64 %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3 - %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4 - %data.i1.i649.48.extract.shift = lshr i512 %s151, 384 - %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64 - %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5 - %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6 - %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64 - %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7 - %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> <i32 0, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 10, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef> - %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef> - %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef> - %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef> - %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15> %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7 - store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64 + store <8 x i64> %s155, <8 x i64> * %arrayidx83, align 64 ret void } diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll new file mode 100644 index 000000000000..de5fd31e2f2d --- /dev/null +++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll @@ -0,0 +1,28 @@ +;Check 5.5 Parameter Passing --> Stage C --> C.1.cp statement for VA functions. +;Note: There are no VFP CPRCs in a variadic procedure. +;Check that after %C was sent to stack, we set Next Core Register Number to R4. + +;This test is simplified IR version of +;test-suite/SingleSource/UnitTests/2002-05-02-ManyArguments.c + +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s + +@.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1 + +;CHECK: printfn: +define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) { +entry: + %conv = sext i16 %b to i32 + %conv1 = sext i8 %E to i32 + %call = tail call i32 (i8*, ...)* @printf( + i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), ; --> R0 + i32 %a, ; --> R1 + i32 %conv, ; --> R2 + double %C, ; --> SP, NCRN := R4 +;CHECK: str r2, [sp, #8] + i32 %conv1) ; --> SP+8 + ret void +} + +declare i32 @printf(i8* nocapture, ...) + diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll new file mode 100644 index 000000000000..6db71fed958e --- /dev/null +++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll @@ -0,0 +1,48 @@ +;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules. +;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4 +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s + +%st_t = type { i32, i32 } +@static_val = constant %st_t { i32 777, i32 888} + +declare void @fooUseStruct(%st_t*) + +define void @foo(double %vfp0, ; --> D0, NSAA=SP + double %vfp1, ; --> D1, NSAA=SP + double %vfp2, ; --> D2, NSAA=SP + double %vfp3, ; --> D3, NSAA=SP + double %vfp4, ; --> D4, NSAA=SP + double %vfp5, ; --> D5, NSAA=SP + double %vfp6, ; --> D6, NSAA=SP + double %vfp7, ; --> D7, NSAA=SP + double %vfp8, ; --> SP, NSAA=SP+8 (!) + i32 %p0, ; --> R0, NSAA=SP+8 + %st_t* byval %p1, ; --> R1, R2, NSAA=SP+8 + i32 %p2, ; --> R3, NSAA=SP+8 + i32 %p3) #0 { ; --> SP+4, NSAA=SP+12 +entry: + ;CHECK: sub sp, #8 + ;CHECK: push.w {r11, lr} + ;CHECK: add r0, sp, #16 + ;CHECK: str r2, [sp, #20] + ;CHECK: str r1, [sp, #16] + ;CHECK: bl fooUseStruct + call void @fooUseStruct(%st_t* %p1) + ret void +} + +define void @doFoo() { +entry: + call void @foo(double 23.0, + double 23.1, + double 23.2, + double 23.3, + double 23.4, + double 23.5, + double 23.6, + double 23.7, + double 23.8, + i32 0, %st_t* byval @static_val, i32 1, i32 2) + ret void +} + diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll new file mode 100644 index 000000000000..212bbc2ee9c8 --- /dev/null +++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll @@ -0,0 +1,45 @@ +;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules. +;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize > R4 +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s + +%st_t = type { i32, i32, i32, i32 } +@static_val = constant %st_t { i32 777, i32 888, i32 787, i32 878} + +define void @foo(double %vfp0, ; --> D0, NSAA=SP + double %vfp1, ; --> D1, NSAA=SP + double %vfp2, ; --> D2, NSAA=SP + double %vfp3, ; --> D3, NSAA=SP + double %vfp4, ; --> D4, NSAA=SP + double %vfp5, ; --> D5, NSAA=SP + double %vfp6, ; --> D6, NSAA=SP + double %vfp7, ; --> D7, NSAA=SP + double %vfp8, ; --> SP, NSAA=SP+8 (!) + i32 %p0, ; --> R0, NSAA=SP+8 + %st_t* byval %p1, ; --> SP+8, 4 words NSAA=SP+24 + i32 %p2) #0 { ; --> SP+24, NSAA=SP+24 + +entry: + ;CHECK: push.w {r11, lr} + ;CHECK: ldr r0, [sp, #32] + ;CHECK: bl fooUseI32 + call void @fooUseI32(i32 %p2) + ret void +} + +declare void @fooUseI32(i32) + +define void @doFoo() { +entry: + call void @foo(double 23.0, + double 23.1, + double 23.2, + double 23.3, + double 23.4, + double 23.5, + double 23.6, + double 23.7, + double 23.8, + i32 0, %st_t* byval @static_val, i32 1) + ret void +} + diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll new file mode 100644 index 000000000000..abc6e0d11144 --- /dev/null +++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll @@ -0,0 +1,71 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s +; rdar://13782395 + +define i32 @t1(i32 %a, i32 %b, i8** %retaddr) { +; CHECK: t1: +; CHECK: Block address taken +; CHECK-NOT: Address of block that was removed by CodeGen + store i8* blockaddress(@t1, %cond_true), i8** %retaddr + %tmp2 = icmp eq i32 %a, 0 + br i1 %tmp2, label %cond_false, label %cond_true + +cond_true: + %tmp5 = add i32 %b, 1 + ret i32 %tmp5 + +cond_false: + %tmp7 = add i32 %b, -1 + ret i32 %tmp7 +} + +define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d, i8** %retaddr) { +; CHECK: t2: +; CHECK: Block address taken +; CHECK: %cond_true +; CHECK: add +; CHECK: bx lr + store i8* blockaddress(@t2, %cond_true), i8** %retaddr + %tmp2 = icmp sgt i32 %c, 10 + %tmp5 = icmp slt i32 %d, 4 + %tmp8 = and i1 %tmp5, %tmp2 + %tmp13 = add i32 %b, %a + br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp15 = add i32 %tmp13, %c + %tmp1821 = sub i32 %tmp15, %d + ret i32 %tmp1821 + +UnifiedReturnBlock: + ret i32 %tmp13 +} + +define hidden fastcc void @t3(i8** %retaddr) { +; CHECK: t3: +; CHECK: Block address taken +; CHECK-NOT: Address of block that was removed by CodeGen +bb: + store i8* blockaddress(@t3, %KBBlockZero_return_1), i8** %retaddr + br i1 undef, label %bb77, label %bb7.i + +bb7.i: ; preds = %bb35 + br label %bb2.i + +KBBlockZero_return_1: ; preds = %KBBlockZero.exit + unreachable + +KBBlockZero_return_0: ; preds = %KBBlockZero.exit + unreachable + +bb77: ; preds = %bb26, %bb12, %bb + ret void + +bb2.i: ; preds = %bb6.i350, %bb7.i + br i1 undef, label %bb6.i350, label %KBBlockZero.exit + +bb6.i350: ; preds = %bb2.i + br label %bb2.i + +KBBlockZero.exit: ; preds = %bb2.i + indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0] +} diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll index c5d00a0f8a4c..c14f5302d311 100644 --- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -91,7 +91,7 @@ entry: ; CHECK: t4 ; CHECK: vmrs APSR_nzcv, fpscr ; CHECK: if.then -; CHECK-NOT movs +; CHECK-NOT: movs %0 = load double* %q, align 4 %cmp = fcmp olt double %0, 1.000000e+01 %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1 diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll index 769ba55eb9eb..fbc25b45b6ff 100644 --- a/test/CodeGen/ARM/commute-movcc.ll +++ b/test/CodeGen/ARM/commute-movcc.ll @@ -32,7 +32,7 @@ for.body: ; preds = %entry, %if.end8 %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ] %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ] %arrayidx = getelementptr inbounds i32* %a, i32 %i.012 - %0 = load i32* %arrayidx, align 4, !tbaa !0 + %0 = load i32* %arrayidx, align 4 %mul = mul i32 %0, %0 %sub = add nsw i32 %i.012, -5 %cmp2 = icmp eq i32 %sub, %Pref @@ -53,7 +53,7 @@ if.else: ; preds = %for.body if.end8: ; preds = %if.else, %if.then %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ] %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ] - store i32 %mul, i32* %arrayidx, align 4, !tbaa !0 + store i32 %mul, i32* %arrayidx, align 4 %inc = add i32 %i.012, 1 %cmp = icmp eq i32 %inc, 11 br i1 %cmp, label %for.end, label %for.body @@ -61,7 +61,3 @@ if.end8: ; preds = %if.else, %if.then for.end: ; preds = %if.end8 ret i32 %BestIdx.1 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll new file mode 100644 index 000000000000..e9e0fe3239a7 --- /dev/null +++ b/test/CodeGen/ARM/dagcombine-concatvector.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s + +; PR15525 +; CHECK: test1: +; CHECK: ldr.w [[REG:r[0-9]+]], [sp] +; CHECK-NEXT: vmov {{d[0-9]+}}, r1, r2 +; CHECK-NEXT: vmov {{d[0-9]+}}, r3, [[REG]] +; CHECK-NEXT: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0] +; CHECK-NEXT: bx lr +define void @test1(i8* %arg, [4 x i64] %vec.coerce) { +bb: + %tmp = extractvalue [4 x i64] %vec.coerce, 0 + %tmp2 = bitcast i64 %tmp to <8 x i8> + %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %tmp4 = extractvalue [4 x i64] %vec.coerce, 1 + %tmp5 = bitcast i64 %tmp4 to <8 x i8> + %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2) + ret void +} + +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll index 33c8e9daae69..c162260dcd0c 100644 --- a/test/CodeGen/ARM/debug-info-arg.ll +++ b/test/CodeGen/ARM/debug-info-arg.ll @@ -31,7 +31,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll index 95e6cf2554a0..38945ac2ea7b 100644 --- a/test/CodeGen/ARM/debug-info-branch-folding.ll +++ b/test/CodeGen/ARM/debug-info-branch-folding.ll @@ -40,7 +40,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll index e3e4d068932e..e4040fa02caa 100644 --- a/test/CodeGen/ARM/debug-info-d16-reg.ll +++ b/test/CodeGen/ARM/debug-info-d16-reg.ll @@ -60,7 +60,7 @@ declare i32 @puts(i8* nocapture) nounwind !0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, null, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8} !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll index 038c2296cdbe..1de6ffaeec7d 100644 --- a/test/CodeGen/ARM/debug-info-qreg.ll +++ b/test/CodeGen/ARM/debug-info-qreg.ll @@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll index f3af0b93c69c..186894232eaf 100644 --- a/test/CodeGen/ARM/debug-info-s16-reg.ll +++ b/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -65,7 +65,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll index ae02a245b432..ba83f797e2ce 100644 --- a/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/test/CodeGen/ARM/debug-info-sreg2.ll @@ -41,7 +41,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null, null} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll index c42839d9fe3d..4c92a2975d39 100644 --- a/test/CodeGen/ARM/ehabi-filters.ll +++ b/test/CodeGen/ARM/ehabi-filters.ll @@ -19,7 +19,7 @@ define i32 @main() { entry: %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind %0 = bitcast i8* %exception.i to i32* - store i32 42, i32* %0, align 4, !tbaa !0 + store i32 42, i32* %0, align 4 invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn to label %unreachable.i unwind label %lpad.i @@ -71,7 +71,3 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll new file mode 100644 index 000000000000..11f3e6db0fe5 --- /dev/null +++ b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll @@ -0,0 +1,49 @@ +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -disable-fp-elim -filetype=obj -o - %s \ +; RUN: | llvm-objdump -s - \ +; RUN: | FileCheck %s --check-prefix=CHECK + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -filetype=obj -o - %s \ +; RUN: | llvm-objdump -s - \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -disable-fp-elim -filetype=obj -o - %s \ +; RUN: | llvm-objdump -r - \ +; RUN: | FileCheck %s --check-prefix=CHECK-RELOC + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -filetype=obj -o - %s \ +; RUN: | llvm-objdump -r - \ +; RUN: | FileCheck %s --check-prefix=CHECK-RELOC + +define void @_Z4testv() { +entry: + tail call void @_Z15throw_exceptionv() + ret void +} + +declare void @_Z15throw_exceptionv() + +; CHECK-NOT: section .ARM.extab +; CHECK: section .text +; CHECK-NOT: section .ARM.extab +; CHECK: section .ARM.exidx +; CHECK-NEXT: 0000 00000000 80849b80 +; CHECK-NOT: section .ARM.extab + +; CHECK-FP-ELIM-NOT: section .ARM.extab +; CHECK-FP-ELIM: section .text +; CHECK-FP-ELIM-NOT: section .ARM.extab +; CHECK-FP-ELIM: section .ARM.exidx +; CHECK-FP-ELIM-NEXT: 0000 00000000 b0808480 +; CHECK-FP-ELIM-NOT: section .ARM.extab + +; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx] +; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text +; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0 diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll new file mode 100644 index 000000000000..79dba084c044 --- /dev/null +++ b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll @@ -0,0 +1,62 @@ +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -disable-fp-elim -filetype=obj -o - %s \ +; RUN: | llvm-objdump -s - \ +; RUN: | FileCheck %s --check-prefix=CHECK + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -filetype=obj -o - %s \ +; RUN: | llvm-objdump -s - \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -disable-fp-elim -filetype=obj -o - %s \ +; RUN: | llvm-objdump -r - \ +; RUN: | FileCheck %s --check-prefix=CHECK-RELOC + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -filetype=obj -o - %s \ +; RUN: | llvm-objdump -r - \ +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM-RELOC + +define i32 @_Z3addiiiiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %add, %c + %add2 = add nsw i32 %add1, %d + tail call void @_Z15throw_exceptioni(i32 %add2) + %add3 = add nsw i32 %f, %e + %add4 = add nsw i32 %add3, %g + %add5 = add nsw i32 %add4, %h + tail call void @_Z15throw_exceptioni(i32 %add5) + %add6 = add nsw i32 %add5, %add2 + ret i32 %add6 +} + +declare void @_Z15throw_exceptioni(i32) + +; CHECK-NOT: section .ARM.extab +; CHECK: section .text +; CHECK: section .ARM.extab +; CHECK-NEXT: 0000 419b0181 b0b08384 +; CHECK: section .ARM.exidx +; CHECK-NEXT: 0000 00000000 00000000 +; CHECK-NOT: section .ARM.extab + +; CHECK-FP-ELIM-NOT: section .ARM.extab +; CHECK-FP-ELIM: section .text +; CHECK-FP-ELIM-NOT: section .ARM.extab +; CHECK-FP-ELIM: section .ARM.exidx +; CHECK-FP-ELIM-NEXT: 0000 00000000 b0838480 +; CHECK-FP-ELIM-NOT: section .ARM.extab + +; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx] +; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text +; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr1 + +; CHECK-FP-ELIM-RELOC: RELOCATION RECORDS FOR [.ARM.exidx] +; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_PREL31 .text +; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0 diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll index 5e4b5096c494..616aa1ba46e7 100644 --- a/test/CodeGen/ARM/ehabi-mc-section-group.ll +++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll @@ -8,7 +8,7 @@ ; RUN: llc -mtriple arm-unknown-linux-gnueabi \ ; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -filetype=obj -o - %s \ -; RUN: | elf-dump --dump-section-data \ +; RUN: | llvm-readobj -s -sd \ ; RUN: | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" @@ -68,12 +68,21 @@ declare void @__cxa_end_catch() declare void @_ZSt9terminatev() -; CHECK: # Section 1 -; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group' -; CHECK: ('_section_data', '01000000 0a000000 0c000000 0e000000') -; CHECK: # Section 10 -; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_' -; CHECK: # Section 12 -; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_' -; CHECK: # Section 14 -; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_' +; CHECK: Section { +; CHECK: Index: 1 +; CHECK-NEXT: Name: .group (47) +; CHECK: SectionData ( +; CHECK-NEXT: 0000: 01000000 09000000 0B000000 0D000000 +; CHECK-NEXT: ) + +; CHECK: Section { +; CHECK: Index: 9 +; CHECK-NEXT: Name: .text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (214) + +; CHECK: Section { +; CHECK: Index: 11 +; CHECK-NEXT: Name: .ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (204) + +; CHECK: Section { +; CHECK: Index: 13 +; CHECK-NEXT: Name: .ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (90) diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll index fc51b240ff3d..4e6e46829148 100644 --- a/test/CodeGen/ARM/ehabi-mc-section.ll +++ b/test/CodeGen/ARM/ehabi-mc-section.ll @@ -1,8 +1,14 @@ -; RUN: llc -mtriple arm-unknown-linux-gnueabi \ +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -disable-fp-elim -filetype=obj -o - %s \ +; RUN: | llvm-objdump -s - \ +; RUN: | FileCheck %s --check-prefix=CHECK + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ ; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -filetype=obj -o - %s \ ; RUN: | llvm-objdump -s - \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" { entry: @@ -54,6 +60,12 @@ declare void @_ZSt9terminatev() ; CHECK: section .test_section ; CHECK: section .ARM.extab.test_section -; CHECK-NEXT: 0000 00000000 b0b0b000 +; CHECK-NEXT: 0000 00000000 c9409b01 b0818484 ; CHECK: section .ARM.exidx.test_section ; CHECK-NEXT: 0000 00000000 00000000 + +; CHECK-FP-ELIM: section .test_section +; CHECK-FP-ELIM: section .ARM.extab.test_section +; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8 +; CHECK-FP-ELIM: section .ARM.exidx.test_section +; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000 diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll index f90e5f384c1e..ac0a0fc9309a 100644 --- a/test/CodeGen/ARM/ehabi-mc-sh_link.ll +++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll @@ -7,7 +7,7 @@ ; RUN: llc -mtriple arm-unknown-linux-gnueabi \ ; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -filetype=obj -o - %s \ -; RUN: | elf-dump --dump-section-data \ +; RUN: | llvm-readobj -s \ ; RUN: | FileCheck %s define void @test1() nounwind { @@ -20,28 +20,39 @@ entry: ret void } -; CHECK: # Section 1 -; CHECK-NEXT: (('sh_name', 0x00000010) # '.text' +; CHECK: Sections [ +; CHECK: Section { +; CHECK: Index: 1 +; CHECK-NEXT: Name: .text (16) -; CHECK: (('sh_name', 0x00000005) # '.ARM.exidx' -; CHECK-NEXT: ('sh_type', 0x70000001) -; CHECK-NEXT: ('sh_flags', 0x00000082) -; CHECK-NEXT: ('sh_addr', 0x00000000) -; CHECK-NEXT: ('sh_offset', 0x0000005c) -; CHECK-NEXT: ('sh_size', 0x00000008) -; CHECK-NEXT: ('sh_link', 0x00000001) -; CHECK-NEXT: ('sh_info', 0x00000000) -; CHECK-NEXT: ('sh_addralign', 0x00000004) +; CHECK: Section { +; CHECK: Name: .ARM.exidx (5) +; CHECK-NEXT: Type: SHT_ARM_EXIDX +; CHECK-NEXT: Flags [ (0x82) +; CHECK-NEXT: SHF_ALLOC +; CHECK-NEXT: SHF_LINK_ORDER +; CHECK-NEXT: ] +; CHECK-NEXT: Address: 0x0 +; CHECK-NEXT: Offset: 0x5C +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Link: 1 +; CHECK-NEXT: Info: 0 +; CHECK-NEXT: AddressAlignment: 4 -; CHECK: # Section 7 -; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section' +; CHECK: Section { +; CHECK: Index: 7 +; CHECK-NEXT: Name: .test_section (57) -; CHECK: (('sh_name', 0x0000002f) # '.ARM.exidx.test_section' -; CHECK-NEXT: ('sh_type', 0x70000001) -; CHECK-NEXT: ('sh_flags', 0x00000082) -; CHECK-NEXT: ('sh_addr', 0x00000000) -; CHECK-NEXT: ('sh_offset', 0x00000068) -; CHECK-NEXT: ('sh_size', 0x00000008) -; CHECK-NEXT: ('sh_link', 0x00000007) -; CHECK-NEXT: ('sh_info', 0x00000000) -; CHECK-NEXT: ('sh_addralign', 0x00000004) +; CHECK: Section { +; CHECK: Name: .ARM.exidx.test_section (47) +; CHECK-NEXT: Type: SHT_ARM_EXIDX +; CHECK-NEXT: Flags [ (0x82) +; CHECK-NEXT: SHF_ALLOC +; CHECK-NEXT: SHF_LINK_ORDER +; CHECK-NEXT: ] +; CHECK-NEXT: Address: 0x0 +; CHECK-NEXT: Offset: 0x68 +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Link: 7 +; CHECK-NEXT: Info: 0 +; CHECK-NEXT: AddressAlignment: 4 diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll index 0dc2ef7838f0..83b8425af7c4 100644 --- a/test/CodeGen/ARM/ehabi-mc.ll +++ b/test/CodeGen/ARM/ehabi-mc.ll @@ -1,8 +1,14 @@ -; RUN: llc -mtriple arm-unknown-linux-gnueabi \ +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ +; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ +; RUN: -disable-fp-elim -filetype=obj -o - %s \ +; RUN: | llvm-objdump -s - \ +; RUN: | FileCheck %s --check-prefix=CHECK + +; RUN: llc -mtriple armv7-unknown-linux-gnueabi \ ; RUN: -arm-enable-ehabi -arm-enable-ehabi-descriptors \ ; RUN: -filetype=obj -o - %s \ ; RUN: | llvm-objdump -s - \ -; RUN: | FileCheck %s +; RUN: | FileCheck %s --check-prefix=CHECK-FP-ELIM define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) { entry: @@ -54,6 +60,12 @@ declare void @_ZSt9terminatev() ; CHECK: section .text ; CHECK: section .ARM.extab -; CHECK-NEXT: 0000 00000000 b0b0b000 +; CHECK-NEXT: 0000 00000000 c9409b01 b0818484 ; CHECK: section .ARM.exidx ; CHECK-NEXT: 0000 00000000 00000000 + +; CHECK-FP-ELIM: section .text +; CHECK-FP-ELIM: section .ARM.extab +; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8 +; CHECK-FP-ELIM: section .ARM.exidx +; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000 diff --git a/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll b/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll new file mode 100644 index 000000000000..00027119f9e0 --- /dev/null +++ b/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s + +; This test makes sure spills of 64-bit pairs in Thumb mode actually +; generate thumb instructions. Previously we were inserting an ARM +; STMIA which happened to have the same encoding. + +define void @foo(i64* %addr) { + %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + + ; Make sure we are actually creating the Thumb versions of the spill + ; instructions. +; CHECK: t2STRDi8 +; CHECK: t2LDRDi8 + + store volatile i64 %val1, i64* %addr + store volatile i64 %val2, i64* %addr + store volatile i64 %val3, i64* %addr + store volatile i64 %val4, i64* %addr + store volatile i64 %val5, i64* %addr + store volatile i64 %val6, i64* %addr + store volatile i64 %val7, i64* %addr + ret void +} diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll new file mode 100644 index 000000000000..ef3e5a54a2db --- /dev/null +++ b/test/CodeGen/ARM/gpr-paired-spill.ll @@ -0,0 +1,44 @@ +; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD +; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD +; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD + +define void @foo(i64* %addr) { + %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr) + + ; Key point is that enough 64-bit paired GPR values are live that + ; one of them has to be spilled. This used to cause an abort because + ; an LDMIA was created with both a FrameIndex and an offset, which + ; is not allowed. + +; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp] + +; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8] +; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp] + + ; We also want to ensure the register scavenger is working (i.e. an + ; offset from sp can be generated), so we need two spills. +; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}} +; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}} + + ; In principle LLVM may have to recalculate the offset. At the moment + ; it reuses the original though. +; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}} +; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}} + + store volatile i64 %val1, i64* %addr + store volatile i64 %val2, i64* %addr + store volatile i64 %val3, i64* %addr + store volatile i64 %val4, i64* %addr + store volatile i64 %val5, i64* %addr + store volatile i64 %val6, i64* %addr + store volatile i64 %val7, i64* %addr + ret void +} diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll index 5b4cf9d81606..9b0f3e54e88a 100644 --- a/test/CodeGen/ARM/lsr-unfolded-offset.ll +++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll @@ -26,8 +26,8 @@ outer.loop: ; preds = %for.inc69, %entry %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ] %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2 %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3 - %tmp5 = load i64* %offset, align 4, !tbaa !0 - %tmp15 = load i64* %len, align 4, !tbaa !0 + %tmp5 = load i64* %offset, align 4 + %tmp15 = load i64* %len, align 4 %add = add nsw i64 %tmp15, %tmp5 br label %inner.loop @@ -40,8 +40,8 @@ inner.loop: ; preds = %for.inc, %outer.loo if.end: ; preds = %inner.loop %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3 %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2 - %tmp29 = load i64* %offset28, align 4, !tbaa !0 - %tmp40 = load i64* %len39, align 4, !tbaa !0 + %tmp29 = load i64* %offset28, align 4 + %tmp40 = load i64* %len39, align 4 %add41 = add nsw i64 %tmp40, %tmp29 %cmp44 = icmp sge i64 %tmp29, %tmp5 %cmp47 = icmp slt i64 %tmp29, %add @@ -74,7 +74,3 @@ for.end72: ; preds = %for.inc69, %entry %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ] ret i32 %overlap.0.lcssa } - -!0 = metadata !{metadata !"long long", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll new file mode 100644 index 000000000000..4b15326008a4 --- /dev/null +++ b/test/CodeGen/ARM/misched-copy-arm.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: llc < %s -march=thumb -mcpu=swift -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s +; +; Loop counter copies should be eliminated. +; There is also a MUL here, but we don't care where it is scheduled. +; CHECK: postinc +; CHECK: *** Final schedule for BB#2 *** +; CHECK: t2LDRs +; CHECK: t2ADDrr +; CHECK: t2CMPrr +; CHECK: COPY +define i32 @postinc(i32 %a, i32* nocapture %d, i32 %s) nounwind { +entry: + %cmp4 = icmp eq i32 %a, 0 + br i1 %cmp4, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ] + %indvars.iv.next = add i32 %indvars.iv, %s + %arrayidx = getelementptr inbounds i32* %d, i32 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %mul = mul nsw i32 %0, %s.05 + %exitcond = icmp eq i32 %indvars.iv.next, %a + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %mul, %for.body ] + ret i32 %s.0.lcssa +} diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll new file mode 100644 index 000000000000..bf2770b15b01 --- /dev/null +++ b/test/CodeGen/ARM/neon_vabs.ll @@ -0,0 +1,91 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <4 x i32> @test1(<4 x i32> %a) nounwind { +; CHECK: test1: +; CHECK: vabs.s32 q + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> + %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg + ret <4 x i32> %abs +} + +define <4 x i32> @test2(<4 x i32> %a) nounwind { +; CHECK: test2: +; CHECK: vabs.s32 q + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sge <4 x i32> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg + ret <4 x i32> %abs +} + +define <8 x i16> @test3(<8 x i16> %a) nounwind { +; CHECK: test3: +; CHECK: vabs.s16 q + %tmp1neg = sub <8 x i16> zeroinitializer, %a + %b = icmp sgt <8 x i16> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg + ret <8 x i16> %abs +} + +define <16 x i8> @test4(<16 x i8> %a) nounwind { +; CHECK: test4: +; CHECK: vabs.s8 q + %tmp1neg = sub <16 x i8> zeroinitializer, %a + %b = icmp slt <16 x i8> %a, zeroinitializer + %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a + ret <16 x i8> %abs +} + +define <4 x i32> @test5(<4 x i32> %a) nounwind { +; CHECK: test5: +; CHECK: vabs.s32 q + %tmp1neg = sub <4 x i32> zeroinitializer, %a + %b = icmp sle <4 x i32> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a + ret <4 x i32> %abs +} + +define <2 x i32> @test6(<2 x i32> %a) nounwind { +; CHECK: test6: +; CHECK: vabs.s32 d + %tmp1neg = sub <2 x i32> zeroinitializer, %a + %b = icmp sgt <2 x i32> %a, <i32 -1, i32 -1> + %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg + ret <2 x i32> %abs +} + +define <2 x i32> @test7(<2 x i32> %a) nounwind { +; CHECK: test7: +; CHECK: vabs.s32 d + %tmp1neg = sub <2 x i32> zeroinitializer, %a + %b = icmp sge <2 x i32> %a, zeroinitializer + %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg + ret <2 x i32> %abs +} + +define <4 x i16> @test8(<4 x i16> %a) nounwind { +; CHECK: test8: +; CHECK: vabs.s16 d + %tmp1neg = sub <4 x i16> zeroinitializer, %a + %b = icmp sgt <4 x i16> %a, zeroinitializer + %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg + ret <4 x i16> %abs +} + +define <8 x i8> @test9(<8 x i8> %a) nounwind { +; CHECK: test9: +; CHECK: vabs.s8 d + %tmp1neg = sub <8 x i8> zeroinitializer, %a + %b = icmp slt <8 x i8> %a, zeroinitializer + %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a + ret <8 x i8> %abs +} + +define <2 x i32> @test10(<2 x i32> %a) nounwind { +; CHECK: test10: +; CHECK: vabs.s32 d + %tmp1neg = sub <2 x i32> zeroinitializer, %a + %b = icmp sle <2 x i32> %a, zeroinitializer + %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a + ret <2 x i32> %abs +} diff --git a/test/CodeGen/ARM/nop_concat_vectors.ll b/test/CodeGen/ARM/nop_concat_vectors.ll new file mode 100644 index 000000000000..c81090095a99 --- /dev/null +++ b/test/CodeGen/ARM/nop_concat_vectors.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s + +;CHECK: _foo +;CHECK-NOT: vld1.32 +;CHECK-NOT: vst1.32 +;CHECK: bx +define void @foo(<16 x i8>* %J) { + %A = load <16 x i8>* %J + %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + %T2 = shufflevector <8 x i8> %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + store <16 x i8> %T2, <16 x i8>* %J + ret void +} diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll index f93ffe7b339a..94578d82fddc 100644 --- a/test/CodeGen/ARM/private.ll +++ b/test/CodeGen/ARM/private.ll @@ -1,10 +1,11 @@ ; Test to make sure that the 'private' is used correctly. ; -; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t -; RUN: grep .Lfoo: %t -; RUN: egrep bl.*\.Lfoo %t -; RUN: grep .Lbaz: %t -; RUN: grep long.*\.Lbaz %t +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; CHECK: .Lfoo: +; CHECK: bar: +; CHECK: bl .Lfoo +; CHECK: .long .Lbaz +; CHECK: .Lbaz: define private void @foo() { ret void diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll new file mode 100644 index 000000000000..670b12f249d4 --- /dev/null +++ b/test/CodeGen/ARM/returned-ext.ll @@ -0,0 +1,178 @@ +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D + +declare i16 @identity16(i16 returned %x) +declare i32 @identity32(i32 returned %x) +declare zeroext i16 @retzext16(i16 returned %x) +declare i16 @paramzext16(i16 zeroext returned %x) +declare zeroext i16 @bothzext16(i16 zeroext returned %x) + +; The zeroext param attribute below is meant to have no effect +define i16 @test_identity(i16 zeroext %x) { +entry: +; CHECKELF: test_identity: +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 +; CHECKELF: bl identity16 +; CHECKELF: uxth r0, r0 +; CHECKELF: bl identity32 +; CHECKELF: mov r0, [[SAVEX]] +; CHECKT2D: test_identity: +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 +; CHECKT2D: blx _identity16 +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _identity32 +; CHECKT2D: mov r0, [[SAVEX]] + %call = tail call i16 @identity16(i16 %x) + %b = zext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} + +; FIXME: This ought not to require register saving but currently does because +; x is not considered equal to %call (see SelectionDAGBuilder.cpp) +define i16 @test_matched_ret(i16 %x) { +entry: +; CHECKELF: test_matched_ret: + +; This shouldn't be required +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 + +; CHECKELF: bl retzext16 +; CHECKELF-NOT: uxth r0, {{r[0-9]+}} +; CHECKELF: bl identity32 + +; This shouldn't be required +; CHECKELF: mov r0, [[SAVEX]] + +; CHECKT2D: test_matched_ret: + +; This shouldn't be required +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 + +; CHECKT2D: blx _retzext16 +; CHECKT2D-NOT: uxth r0, {{r[0-9]+}} +; CHECKT2D: blx _identity32 + +; This shouldn't be required +; CHECKT2D: mov r0, [[SAVEX]] + + %call = tail call i16 @retzext16(i16 %x) + %b = zext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} + +define i16 @test_mismatched_ret(i16 %x) { +entry: +; CHECKELF: test_mismatched_ret: +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 +; CHECKELF: bl retzext16 +; CHECKELF: sxth r0, {{r[0-9]+}} +; CHECKELF: bl identity32 +; CHECKELF: mov r0, [[SAVEX]] +; CHECKT2D: test_mismatched_ret: +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 +; CHECKT2D: blx _retzext16 +; CHECKT2D: sxth r0, {{r[0-9]+}} +; CHECKT2D: blx _identity32 +; CHECKT2D: mov r0, [[SAVEX]] + %call = tail call i16 @retzext16(i16 %x) + %b = sext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} + +define i16 @test_matched_paramext(i16 %x) { +entry: +; CHECKELF: test_matched_paramext: +; CHECKELF: uxth r0, r0 +; CHECKELF: bl paramzext16 +; CHECKELF: uxth r0, r0 +; CHECKELF: bl identity32 +; CHECKELF: b paramzext16 +; CHECKT2D: test_matched_paramext: +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _paramzext16 +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _identity32 +; CHECKT2D: b.w _paramzext16 + %call = tail call i16 @paramzext16(i16 %x) + %b = zext i16 %call to i32 + %call2 = tail call i32 @identity32(i32 %b) + %call3 = tail call i16 @paramzext16(i16 %call) + ret i16 %call3 +} + +; FIXME: This theoretically ought to optimize to exact same output as the +; version above, but doesn't currently (see SelectionDAGBuilder.cpp) +define i16 @test_matched_paramext2(i16 %x) { +entry: + +; Since there doesn't seem to be an unambiguous optimal selection and +; scheduling of uxth and mov instructions below in lieu of the 'returned' +; optimization, don't bother checking: just verify that the calls are made +; in the correct order as a basic sanity check + +; CHECKELF: test_matched_paramext2: +; CHECKELF: bl paramzext16 +; CHECKELF: bl identity32 +; CHECKELF: b paramzext16 +; CHECKT2D: test_matched_paramext2: +; CHECKT2D: blx _paramzext16 +; CHECKT2D: blx _identity32 +; CHECKT2D: b.w _paramzext16 + %call = tail call i16 @paramzext16(i16 %x) + +; Should make no difference if %x is used below rather than %call, but it does + %b = zext i16 %x to i32 + + %call2 = tail call i32 @identity32(i32 %b) + %call3 = tail call i16 @paramzext16(i16 %call) + ret i16 %call3 +} + +define i16 @test_matched_bothext(i16 %x) { +entry: +; CHECKELF: test_matched_bothext: +; CHECKELF: uxth r0, r0 +; CHECKELF: bl bothzext16 +; CHECKELF-NOT: uxth r0, r0 + +; FIXME: Tail call should be OK here +; CHECKELF: bl identity32 + +; CHECKT2D: test_matched_bothext: +; CHECKT2D: uxth r0, r0 +; CHECKT2D: blx _bothzext16 +; CHECKT2D-NOT: uxth r0, r0 + +; FIXME: Tail call should be OK here +; CHECKT2D: blx _identity32 + + %call = tail call i16 @bothzext16(i16 %x) + %b = zext i16 %x to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %call +} + +define i16 @test_mismatched_bothext(i16 %x) { +entry: +; CHECKELF: test_mismatched_bothext: +; CHECKELF: mov [[SAVEX:r[0-9]+]], r0 +; CHECKELF: uxth r0, {{r[0-9]+}} +; CHECKELF: bl bothzext16 +; CHECKELF: sxth r0, [[SAVEX]] +; CHECKELF: bl identity32 +; CHECKELF: mov r0, [[SAVEX]] +; CHECKT2D: test_mismatched_bothext: +; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0 +; CHECKT2D: uxth r0, {{r[0-9]+}} +; CHECKT2D: blx _bothzext16 +; CHECKT2D: sxth r0, [[SAVEX]] +; CHECKT2D: blx _identity32 +; CHECKT2D: mov r0, [[SAVEX]] + %call = tail call i16 @bothzext16(i16 %x) + %b = sext i16 %x to i32 + %call2 = tail call i32 @identity32(i32 %b) + ret i16 %x +} diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll index e015bf098ff8..eb4d0bab929e 100644 --- a/test/CodeGen/ARM/tail-dup.ll +++ b/test/CodeGen/ARM/tail-dup.ll @@ -11,19 +11,19 @@ define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp { entry: - %0 = load i32* %opcodes, align 4, !tbaa !0 + %0 = load i32* %opcodes, align 4 %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0 br label %indirectgoto INCREMENT: ; preds = %indirectgoto %inc = add nsw i32 %result.0, 1 - %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0 + %1 = load i32* %opcodes.addr.0, align 4 %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1 br label %indirectgoto DECREMENT: ; preds = %indirectgoto %dec = add nsw i32 %result.0, -1 - %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0 + %2 = load i32* %opcodes.addr.0, align 4 %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2 br label %indirectgoto @@ -38,7 +38,3 @@ indirectgoto: ; preds = %DECREMENT, %INCREME RETURN: ; preds = %indirectgoto ret i32 %result.0 } - -!0 = metadata !{metadata !"int", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll new file mode 100644 index 000000000000..f06e4a4f8ddc --- /dev/null +++ b/test/CodeGen/ARM/this-return.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF +; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D + +%struct.A = type { i8 } +%struct.B = type { i32 } +%struct.C = type { %struct.B } +%struct.D = type { %struct.B } +%struct.E = type { %struct.B, %struct.B } + +declare %struct.A* @A_ctor_base(%struct.A* returned) +declare %struct.B* @B_ctor_base(%struct.B* returned, i32) +declare %struct.B* @B_ctor_complete(%struct.B* returned, i32) + +declare %struct.A* @A_ctor_base_nothisret(%struct.A*) +declare %struct.B* @B_ctor_base_nothisret(%struct.B*, i32) +declare %struct.B* @B_ctor_complete_nothisret(%struct.B*, i32) + +define %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) { +entry: +; CHECKELF: C_ctor_base: +; CHECKELF-NOT: mov {{r[0-9]+}}, r0 +; CHECKELF: bl A_ctor_base +; CHECKELF-NOT: mov r0, {{r[0-9]+}} +; CHECKELF: b B_ctor_base +; CHECKT2D: C_ctor_base: +; CHECKT2D-NOT: mov {{r[0-9]+}}, r0 +; CHECKT2D: blx _A_ctor_base +; CHECKT2D-NOT: mov r0, {{r[0-9]+}} +; CHECKT2D: b.w _B_ctor_base + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0 + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) { +entry: +; CHECKELF: C_ctor_base_nothisret: +; CHECKELF: mov [[SAVETHIS:r[0-9]+]], r0 +; CHECKELF: bl A_ctor_base_nothisret +; CHECKELF: mov r0, [[SAVETHIS]] +; CHECKELF-NOT: b B_ctor_base_nothisret +; CHECKT2D: C_ctor_base_nothisret: +; CHECKT2D: mov [[SAVETHIS:r[0-9]+]], r0 +; CHECKT2D: blx _A_ctor_base_nothisret +; CHECKT2D: mov r0, [[SAVETHIS]] +; CHECKT2D-NOT: b.w _B_ctor_base_nothisret + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0) + %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0 + %call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1, i32 %x) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_complete(%struct.C* %this, i32 %x) { +entry: +; CHECKELF: C_ctor_complete: +; CHECKELF: b C_ctor_base +; CHECKT2D: C_ctor_complete: +; CHECKT2D: b.w _C_ctor_base + %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) + ret %struct.C* %this +} + +define %struct.C* @C_ctor_complete_nothisret(%struct.C* %this, i32 %x) { +entry: +; CHECKELF: C_ctor_complete_nothisret: +; CHECKELF-NOT: b C_ctor_base_nothisret +; CHECKT2D: C_ctor_complete_nothisret: +; CHECKT2D-NOT: b.w _C_ctor_base_nothisret + %call = tail call %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) + ret %struct.C* %this +} + +define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) { +entry: +; CHECKELF: D_ctor_base: +; CHECKELF-NOT: mov {{r[0-9]+}}, r0 +; CHECKELF: bl B_ctor_complete +; CHECKELF-NOT: mov r0, {{r[0-9]+}} +; CHECKELF: b B_ctor_complete +; CHECKT2D: D_ctor_base: +; CHECKT2D-NOT: mov {{r[0-9]+}}, r0 +; CHECKT2D: blx _B_ctor_complete +; CHECKT2D-NOT: mov r0, {{r[0-9]+}} +; CHECKT2D: b.w _B_ctor_complete + %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0 + %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + ret %struct.D* %this +} + +define %struct.E* @E_ctor_base(%struct.E* %this, i32 %x) { +entry: +; CHECKELF: E_ctor_base: +; CHECKELF-NOT: b B_ctor_complete +; CHECKT2D: E_ctor_base: +; CHECKT2D-NOT: b.w _B_ctor_complete + %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0 + %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x) + %b2 = getelementptr inbounds %struct.E* %this, i32 0, i32 1 + %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x) + ret %struct.E* %this +} diff --git a/test/CodeGen/ARM/v1-constant-fold.ll b/test/CodeGen/ARM/v1-constant-fold.ll new file mode 100644 index 000000000000..b86d5db29c4b --- /dev/null +++ b/test/CodeGen/ARM/v1-constant-fold.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon | FileCheck %s + +; PR15611. Check that we don't crash when constant folding v1i32 types. + +; CHECK: foo: +define void @foo(i32 %arg) { +bb: + %tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 1 + %tmp2 = insertelement <4 x i32> %tmp1, i32 0, i32 2 + %tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3 + %tmp4 = add <4 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1> +; CHECK: bl bar + tail call void @bar(<4 x i32> %tmp4) + ret void +} + +declare void @bar(<4 x i32>) diff --git a/test/CodeGen/ARM/vcvt-cost.ll b/test/CodeGen/ARM/vcvt-cost.ll new file mode 100644 index 000000000000..0d45c40b8814 --- /dev/null +++ b/test/CodeGen/ARM/vcvt-cost.ll @@ -0,0 +1,153 @@ +; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8 +; instructions as expensive. If lowering is improved the cost model needs to +; change. +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST +%T0_5 = type <8 x i8> +%T1_5 = type <8 x i32> +; CHECK: func_cvt5: +define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) { +; CHECK: vmovl.s8 +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 + %v0 = load %T0_5* %loadaddr +; COST: func_cvt5 +; COST: cost of 3 {{.*}} sext + %r = sext %T0_5 %v0 to %T1_5 + store %T1_5 %r, %T1_5* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%TA0_5 = type <8 x i8> +%TA1_5 = type <8 x i32> +; CHECK: func_cvt1: +define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) { +; CHECK: vmovl.u8 +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 + %v0 = load %TA0_5* %loadaddr +; COST: func_cvt1 +; COST: cost of 3 {{.*}} zext + %r = zext %TA0_5 %v0 to %TA1_5 + store %TA1_5 %r, %TA1_5* %storeaddr + ret void +} + +%T0_51 = type <8 x i32> +%T1_51 = type <8 x i8> +; CHECK: func_cvt51: +define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) { +; CHECK: vmovn.i32 +; CHECK: vmovn.i32 +; CHECK: vmovn.i16 + %v0 = load %T0_51* %loadaddr +; COST: func_cvt51 +; COST: cost of 3 {{.*}} trunc + %r = trunc %T0_51 %v0 to %T1_51 + store %T1_51 %r, %T1_51* %storeaddr + ret void +} + +%TT0_5 = type <16 x i8> +%TT1_5 = type <16 x i32> +; CHECK: func_cvt52: +define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) { +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 + %v0 = load %TT0_5* %loadaddr +; COST: func_cvt52 +; COST: cost of 6 {{.*}} sext + %r = sext %TT0_5 %v0 to %TT1_5 + store %TT1_5 %r, %TT1_5* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%TTA0_5 = type <16 x i8> +%TTA1_5 = type <16 x i32> +; CHECK: func_cvt12: +define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) { +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 + %v0 = load %TTA0_5* %loadaddr +; COST: func_cvt12 +; COST: cost of 6 {{.*}} zext + %r = zext %TTA0_5 %v0 to %TTA1_5 + store %TTA1_5 %r, %TTA1_5* %storeaddr + ret void +} + +%TT0_51 = type <16 x i32> +%TT1_51 = type <16 x i8> +; CHECK: func_cvt512: +define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) { +; CHECK: vmovn.i32 +; CHECK: vmovn.i32 +; CHECK: vmovn.i32 +; CHECK: vmovn.i32 +; CHECK: vmovn.i16 +; CHECK: vmovn.i16 + %v0 = load %TT0_51* %loadaddr +; COST: func_cvt512 +; COST: cost of 6 {{.*}} trunc + %r = trunc %TT0_51 %v0 to %TT1_51 + store %TT1_51 %r, %TT1_51* %storeaddr + ret void +} + +; CHECK: sext_v4i16_v4i64: +define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) { +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 + %v0 = load <4 x i16>* %loadaddr +; COST: sext_v4i16_v4i64 +; COST: cost of 3 {{.*}} sext + %r = sext <4 x i16> %v0 to <4 x i64> + store <4 x i64> %r, <4 x i64>* %storeaddr + ret void +} + +; CHECK: zext_v4i16_v4i64: +define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) { +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 + %v0 = load <4 x i16>* %loadaddr +; COST: zext_v4i16_v4i64 +; COST: cost of 3 {{.*}} zext + %r = zext <4 x i16> %v0 to <4 x i64> + store <4 x i64> %r, <4 x i64>* %storeaddr + ret void +} + +; CHECK: sext_v8i16_v8i64: +define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) { +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 + %v0 = load <8 x i16>* %loadaddr +; COST: sext_v8i16_v8i64 +; COST: cost of 6 {{.*}} sext + %r = sext <8 x i16> %v0 to <8 x i64> + store <8 x i64> %r, <8 x i64>* %storeaddr + ret void +} + +; CHECK: zext_v8i16_v8i64: +define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) { +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 + %v0 = load <8 x i16>* %loadaddr +; COST: zext_v8i16_v8i64 +; COST: cost of 6 {{.*}} zext + %r = zext <8 x i16> %v0 to <8 x i64> + store <8 x i64> %r, <8 x i64>* %storeaddr + ret void +} + diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index e67b4788a37d..c078f493094b 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -156,175 +156,3 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind { declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone - -; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8 -; instructions as expensive. If lowering is improved the cost model needs to -; change. -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST -%T0_5 = type <8 x i8> -%T1_5 = type <8 x i32> -; CHECK: func_cvt5: -define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) { -; CHECK: vmovl.s8 -; CHECK: vmovl.s16 -; CHECK: vmovl.s16 - %v0 = load %T0_5* %loadaddr -; COST: func_cvt5 -; COST: cost of 3 {{.*}} sext - %r = sext %T0_5 %v0 to %T1_5 - store %T1_5 %r, %T1_5* %storeaddr - ret void -} -;; We currently estimate the cost of this instruction as expensive. If lowering -;; is improved the cost needs to change. -%TA0_5 = type <8 x i8> -%TA1_5 = type <8 x i32> -; CHECK: func_cvt1: -define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) { -; CHECK: vmovl.u8 -; CHECK: vmovl.u16 -; CHECK: vmovl.u16 - %v0 = load %TA0_5* %loadaddr -; COST: func_cvt1 -; COST: cost of 3 {{.*}} zext - %r = zext %TA0_5 %v0 to %TA1_5 - store %TA1_5 %r, %TA1_5* %storeaddr - ret void -} -;; We currently estimate the cost of this instruction as expensive. If lowering -;; is improved the cost needs to change. -%T0_51 = type <8 x i32> -%T1_51 = type <8 x i8> -; CHECK: func_cvt51: -define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) { -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb - %v0 = load %T0_51* %loadaddr -; COST: func_cvt51 -; COST: cost of 19 {{.*}} trunc - %r = trunc %T0_51 %v0 to %T1_51 - store %T1_51 %r, %T1_51* %storeaddr - ret void -} -;; We currently estimate the cost of this instruction as expensive. If lowering -;; is improved the cost needs to change. -%TT0_5 = type <16 x i8> -%TT1_5 = type <16 x i32> -; CHECK: func_cvt52: -define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) { -; CHECK: vmovl.s16 -; CHECK: vmovl.s16 -; CHECK: vmovl.s16 -; CHECK: vmovl.s16 - %v0 = load %TT0_5* %loadaddr -; COST: func_cvt52 -; COST: cost of 6 {{.*}} sext - %r = sext %TT0_5 %v0 to %TT1_5 - store %TT1_5 %r, %TT1_5* %storeaddr - ret void -} -;; We currently estimate the cost of this instruction as expensive. If lowering -;; is improved the cost needs to change. -%TTA0_5 = type <16 x i8> -%TTA1_5 = type <16 x i32> -; CHECK: func_cvt12: -define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) { -; CHECK: vmovl.u16 -; CHECK: vmovl.u16 -; CHECK: vmovl.u16 -; CHECK: vmovl.u16 - %v0 = load %TTA0_5* %loadaddr -; COST: func_cvt12 -; COST: cost of 6 {{.*}} zext - %r = zext %TTA0_5 %v0 to %TTA1_5 - store %TTA1_5 %r, %TTA1_5* %storeaddr - ret void -} -;; We currently estimate the cost of this instruction as expensive. If lowering -;; is improved the cost needs to change. -%TT0_51 = type <16 x i32> -%TT1_51 = type <16 x i8> -; CHECK: func_cvt512: -define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) { -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb -; CHECK: strb - %v0 = load %TT0_51* %loadaddr -; COST: func_cvt512 -; COST: cost of 38 {{.*}} trunc - %r = trunc %TT0_51 %v0 to %TT1_51 - store %TT1_51 %r, %TT1_51* %storeaddr - ret void -} - -; CHECK: sext_v4i16_v4i64: -define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) { -; CHECK: vmovl.s32 -; CHECK: vmovl.s32 - %v0 = load <4 x i16>* %loadaddr -; COST: sext_v4i16_v4i64 -; COST: cost of 3 {{.*}} sext - %r = sext <4 x i16> %v0 to <4 x i64> - store <4 x i64> %r, <4 x i64>* %storeaddr - ret void -} - -; CHECK: zext_v4i16_v4i64: -define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) { -; CHECK: vmovl.u32 -; CHECK: vmovl.u32 - %v0 = load <4 x i16>* %loadaddr -; COST: zext_v4i16_v4i64 -; COST: cost of 3 {{.*}} zext - %r = zext <4 x i16> %v0 to <4 x i64> - store <4 x i64> %r, <4 x i64>* %storeaddr - ret void -} - -; CHECK: sext_v8i16_v8i64: -define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) { -; CHECK: vmovl.s32 -; CHECK: vmovl.s32 -; CHECK: vmovl.s32 -; CHECK: vmovl.s32 - %v0 = load <8 x i16>* %loadaddr -; COST: sext_v8i16_v8i64 -; COST: cost of 6 {{.*}} sext - %r = sext <8 x i16> %v0 to <8 x i64> - store <8 x i64> %r, <8 x i64>* %storeaddr - ret void -} - -; CHECK: zext_v8i16_v8i64: -define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) { -; CHECK: vmovl.u32 -; CHECK: vmovl.u32 -; CHECK: vmovl.u32 -; CHECK: vmovl.u32 - %v0 = load <8 x i16>* %loadaddr -; COST: zext_v8i16_v8i64 -; COST: cost of 6 {{.*}} zext - %r = zext <8 x i16> %v0 to <8 x i64> - store <8 x i64> %r, <8 x i64>* %storeaddr - ret void -} - diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll index 3009e50c532b..07ba230757be 100644 --- a/test/CodeGen/ARM/vcvt_combine.ll +++ b/test/CodeGen/ARM/vcvt_combine.ll @@ -7,7 +7,7 @@ ; CHECK-NOT: vmul define void @t0() nounwind { entry: - %tmp = load float* @in, align 4, !tbaa !0 + %tmp = load float* @in, align 4 %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00> @@ -23,7 +23,7 @@ declare void @foo_int32x2_t(<2 x i32>) ; CHECK-NOT: vmul define void @t1() nounwind { entry: - %tmp = load float* @in, align 4, !tbaa !0 + %tmp = load float* @in, align 4 %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00> @@ -39,7 +39,7 @@ declare void @foo_uint32x2_t(<2 x i32>) ; CHECK: vmul define void @t2() nounwind { entry: - %tmp = load float* @in, align 4, !tbaa !0 + %tmp = load float* @in, align 4 %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000> @@ -53,7 +53,7 @@ entry: ; CHECK: vmul define void @t3() nounwind { entry: - %tmp = load float* @in, align 4, !tbaa !0 + %tmp = load float* @in, align 4 %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000> @@ -67,7 +67,7 @@ entry: ; CHECK-NOT: vmul define void @t4() nounwind { entry: - %tmp = load float* @in, align 4, !tbaa !0 + %tmp = load float* @in, align 4 %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0 %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1 %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000> @@ -81,7 +81,7 @@ entry: ; CHECK-NOT: vmul define void @t5() nounwind { entry: - %tmp = load float* @in, align 4, !tbaa !0 + %tmp = load float* @in, align 4 %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0 %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1 %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2 @@ -93,7 +93,3 @@ entry: } declare void @foo_int32x4_t(<4 x i32>) - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll index 7fddbed1ed51..e6f1338b8539 100644 --- a/test/CodeGen/ARM/vdiv_combine.ll +++ b/test/CodeGen/ARM/vdiv_combine.ll @@ -11,7 +11,7 @@ declare void @foo_int32x4_t(<4 x i32>) ; CHECK-NOT: {{vdiv|vmul}} define void @t1() nounwind { entry: - %tmp = load i32* @iin, align 4, !tbaa !3 + %tmp = load i32* @iin, align 4 %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -27,7 +27,7 @@ declare void @foo_float32x2_t(<2 x float>) ; CHECK-NOT: {{vdiv|vmul}} define void @t2() nounwind { entry: - %tmp = load i32* @uin, align 4, !tbaa !3 + %tmp = load i32* @uin, align 4 %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> @@ -41,7 +41,7 @@ entry: ; CHECK: {{vdiv|vmul}} define void @t3() nounwind { entry: - %tmp = load i32* @iin, align 4, !tbaa !3 + %tmp = load i32* @iin, align 4 %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -55,7 +55,7 @@ entry: ; CHECK: {{vdiv|vmul}} define void @t4() nounwind { entry: - %tmp = load i32* @iin, align 4, !tbaa !3 + %tmp = load i32* @iin, align 4 %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -69,7 +69,7 @@ entry: ; CHECK-NOT: {{vdiv|vmul}} define void @t5() nounwind { entry: - %tmp = load i32* @iin, align 4, !tbaa !3 + %tmp = load i32* @iin, align 4 %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0 %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> @@ -83,7 +83,7 @@ entry: ; CHECK-NOT: {{vdiv|vmul}} define void @t6() nounwind { entry: - %tmp = load i32* @iin, align 4, !tbaa !3 + %tmp = load i32* @iin, align 4 %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0 %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1 %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2 @@ -95,8 +95,3 @@ entry: } declare void @foo_float32x4_t(<4 x float>) - -!0 = metadata !{metadata !"float", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} -!3 = metadata !{metadata !"int", metadata !1} diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index 74628f0c5ce6..eb5ad8f0c3d0 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -599,3 +599,27 @@ for.end179: ; preds = %for.cond.loopexit, declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone + +; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8), +; creating an illegal type during legalization and causing an assert. +; PR15970 +define void @no_illegal_types_vmull_sext(<4 x i32> %a) { +entry: + %wide.load283.i = load <4 x i8>* undef, align 1 + %0 = sext <4 x i8> %wide.load283.i to <4 x i32> + %1 = sub nsw <4 x i32> %0, %a + %2 = mul nsw <4 x i32> %1, %1 + %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2 + store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 + ret void +} +define void @no_illegal_types_vmull_zext(<4 x i32> %a) { +entry: + %wide.load283.i = load <4 x i8>* undef, align 1 + %0 = zext <4 x i8> %wide.load283.i to <4 x i32> + %1 = sub nsw <4 x i32> %0, %a + %2 = mul nsw <4 x i32> %1, %1 + %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2 + store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4 + ret void +} |
